diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 17:20:00 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 17:20:00 +0000 |
commit | 8daa83a594a2e98f39d764422bfbdbc62c9efd44 (patch) | |
tree | 4099e8021376c7d8c05bdf8503093d80e9c7bad0 /lib/util/charset/tests | |
parent | Initial commit. (diff) | |
download | samba-8daa83a594a2e98f39d764422bfbdbc62c9efd44.tar.xz samba-8daa83a594a2e98f39d764422bfbdbc62c9efd44.zip |
Adding upstream version 2:4.20.0+dfsg.upstream/2%4.20.0+dfsg
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/util/charset/tests')
-rw-r--r-- | lib/util/charset/tests/charset.c | 342 | ||||
-rw-r--r-- | lib/util/charset/tests/convert_string.c | 2196 | ||||
-rw-r--r-- | lib/util/charset/tests/iconv.c | 495 | ||||
-rw-r--r-- | lib/util/charset/tests/util_unistr.c | 166 |
4 files changed, 3199 insertions, 0 deletions
diff --git a/lib/util/charset/tests/charset.c b/lib/util/charset/tests/charset.c new file mode 100644 index 0000000..547dc51 --- /dev/null +++ b/lib/util/charset/tests/charset.c @@ -0,0 +1,342 @@ +/* + Unix SMB/CIFS implementation. + test suite for the charcnv functions + + Copyright (C) Jelmer Vernooij 2007 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" +#include "torture/torture.h" + +#undef strcasecmp +#undef strncasecmp + +struct torture_suite *torture_local_charset(TALLOC_CTX *mem_ctx); + +static bool test_toupper_m(struct torture_context *tctx) +{ + torture_assert_int_equal(tctx, toupper_m('c'), 'C', "c"); + torture_assert_int_equal(tctx, toupper_m('Z'), 'Z', "z"); + torture_assert_int_equal(tctx, toupper_m(0xFFFF4565), 0xFFFF4565, "0xFFFF4565"); + return true; +} + +static bool test_tolower_m(struct torture_context *tctx) +{ + torture_assert_int_equal(tctx, tolower_m('C'), 'c', "c"); + torture_assert_int_equal(tctx, tolower_m('z'), 'z', "z"); + torture_assert_int_equal(tctx, tolower_m(0xFFFF4565), 0xFFFF4565, "0xFFFF4565"); + return true; +} + +static bool test_codepoint_cmpi(struct torture_context *tctx) +{ + torture_assert_int_equal(tctx, codepoint_cmpi('a', 'a'), 0, "same char"); + torture_assert_int_equal(tctx, codepoint_cmpi('A', 'a'), 0, "upcase version"); + torture_assert_int_equal(tctx, codepoint_cmpi('b', 'a'), 1, "right diff"); + torture_assert_int_equal(tctx, codepoint_cmpi('a', 'b'), -1, "right diff"); + return true; +} + +static bool test_strcasecmp(struct torture_context *tctx) +{ + torture_assert_int_equal(tctx, strcasecmp("foo", "bar"), 4, "different strings both lower"); + torture_assert_int_equal(tctx, strcasecmp("foo", "Bar"), 4, "different strings lower/upper"); + torture_assert_int_equal(tctx, strcasecmp("Foo", "bar"), 4, "different strings upper/lower"); + torture_assert_int_equal(tctx, strcasecmp("AFoo", "_bar"), 2, "different strings upper/lower"); + torture_assert_int_equal(tctx, strcasecmp("foo", "foo"), 0, "same case strings"); + torture_assert_int_equal(tctx, strcasecmp("foo", "Foo"), 0, "different case strings"); + + /* + * Note that strcasecmp() doesn't allow NULL arguments + */ + return true; +} + +static bool test_strcasecmp_m(struct torture_context *tctx) +{ + /* file.{accented e} in iso8859-1 */ + const char file_iso8859_1[7] = { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe9, 0 }; + /* file.{accented e} in utf8 */ + const char file_utf8[8] = { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xc3, 0xa9, 0 }; + torture_assert_int_equal(tctx, strcasecmp_m("foo", "bar"), 4, "different strings both lower"); + torture_assert_int_equal(tctx, strcasecmp_m("foo", "Bar"), 4, "different strings lower/upper"); + torture_assert_int_equal(tctx, strcasecmp_m("Foo", "bar"), 4, "different strings upper/lower"); + torture_assert_int_equal(tctx, strcasecmp_m("AFoo", "_bar"), 2, "different strings upper/lower"); + torture_assert_int_equal(tctx, strcasecmp_m("foo", "foo"), 0, "same case strings"); + torture_assert_int_equal(tctx, strcasecmp_m("foo", "Foo"), 0, "different case strings"); + torture_assert_int_equal(tctx, strcasecmp_m(NULL, "Foo"), -1, "one NULL"); + torture_assert_int_equal(tctx, strcasecmp_m("foo", NULL), 1, "other NULL"); + torture_assert_int_equal(tctx, strcasecmp_m(NULL, NULL), 0, "both NULL"); + torture_assert_int_equal(tctx, strcasecmp_m(file_iso8859_1, file_utf8), 38, + "file.{accented e} should differ"); + return true; +} + + +static bool test_strequal_m(struct torture_context *tctx) +{ + torture_assert(tctx, !strequal_m("foo", "bar"), "different strings"); + torture_assert(tctx, strequal_m("foo", "foo"), "same case strings"); + torture_assert(tctx, strequal_m("foo", "Foo"), "different case strings"); + torture_assert(tctx, !strequal_m(NULL, "Foo"), "one NULL"); + torture_assert(tctx, !strequal_m("foo", NULL), "other NULL"); + torture_assert(tctx, strequal_m(NULL, NULL), "both NULL"); + return true; +} + +static bool test_strcsequal(struct torture_context *tctx) +{ + torture_assert(tctx, !strcsequal("foo", "bar"), "different strings"); + torture_assert(tctx, strcsequal("foo", "foo"), "same case strings"); + torture_assert(tctx, !strcsequal("foo", "Foo"), "different case strings"); + torture_assert(tctx, !strcsequal(NULL, "Foo"), "one NULL"); + torture_assert(tctx, !strcsequal("foo", NULL), "other NULL"); + torture_assert(tctx, strcsequal(NULL, NULL), "both NULL"); + return true; +} + +static bool test_string_replace_m(struct torture_context *tctx) +{ + char data[6] = "bla"; + string_replace_m(data, 'b', 'c'); + torture_assert_str_equal(tctx, data, "cla", "first char replaced"); + memcpy(data, "bab", 4); + string_replace_m(data, 'b', 'c'); + torture_assert_str_equal(tctx, data, "cac", "other chars replaced"); + memcpy(data, "bba", 4); + string_replace_m(data, 'b', 'c'); + torture_assert_str_equal(tctx, data, "cca", "other chars replaced"); + memcpy(data, "blala", 6); + string_replace_m(data, 'o', 'c'); + torture_assert_str_equal(tctx, data, "blala", "no chars replaced"); + string_replace_m(NULL, 'b', 'c'); + return true; +} + +static bool test_strncasecmp(struct torture_context *tctx) +{ + torture_assert_int_equal(tctx, strncasecmp("foo", "bar", 3), 4, "different strings both lower"); + torture_assert_int_equal(tctx, strncasecmp("foo", "Bar", 3), 4, "different strings lower/upper"); + torture_assert_int_equal(tctx, strncasecmp("Foo", "bar", 3), 4, "different strings upper/lower"); + torture_assert_int_equal(tctx, strncasecmp("AFoo", "_bar", 4), 2, "different strings upper/lower"); + torture_assert_int_equal(tctx, strncasecmp("foo", "foo", 3), 0, "same case strings"); + torture_assert_int_equal(tctx, strncasecmp("foo", "Foo", 3), 0, "different case strings"); + torture_assert_int_equal(tctx, strncasecmp("fool", "Foo", 3),0, "different case strings"); + torture_assert_int_equal(tctx, strncasecmp("fool", "Fool", 40), 0, "over size"); + torture_assert_int_equal(tctx, strncasecmp("BLA", "Fool", 0),0, "empty"); + + /* + * Note that strncasecmp() doesn't allow NULL arguments + */ + return true; +} + +static bool test_strncasecmp_m(struct torture_context *tctx) +{ + /* file.{accented e} in iso8859-1 */ + const char file_iso8859_1[7] = { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe9, 0 }; + /* file.{accented e} in utf8 */ + const char file_utf8[8] = { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xc3, 0xa9, 0 }; + torture_assert_int_equal(tctx, strncasecmp_m("foo", "bar", 3), 4, "different strings both lower"); + torture_assert_int_equal(tctx, strncasecmp_m("foo", "Bar", 3), 4, "different strings lower/upper"); + torture_assert_int_equal(tctx, strncasecmp_m("Foo", "bar", 3), 4, "different strings upper/lower"); + torture_assert_int_equal(tctx, strncasecmp_m("AFoo", "_bar", 4), 2, "different strings upper/lower"); + torture_assert_int_equal(tctx, strncasecmp_m("foo", "foo", 3), 0, "same case strings"); + torture_assert_int_equal(tctx, strncasecmp_m("foo", "Foo", 3), 0, "different case strings"); + torture_assert_int_equal(tctx, strncasecmp_m("fool", "Foo", 3),0, "different case strings"); + torture_assert_int_equal(tctx, strncasecmp_m("fool", "Fool", 40), 0, "over size"); + torture_assert_int_equal(tctx, strncasecmp_m("BLA", "Fool", 0),0, "empty"); + torture_assert_int_equal(tctx, strncasecmp_m(NULL, "Foo", 3), -1, "one NULL"); + torture_assert_int_equal(tctx, strncasecmp_m("foo", NULL, 3), 1, "other NULL"); + torture_assert_int_equal(tctx, strncasecmp_m(NULL, NULL, 3), 0, "both NULL"); + torture_assert_int_equal(tctx, strncasecmp_m(file_iso8859_1, file_utf8, 6), 38, + "file.{accented e} should differ"); + return true; +} + +static bool test_next_token_null(struct torture_context *tctx) +{ + char buf[20]; + torture_assert(tctx, !next_token(NULL, buf, " ", 20), "null ptr works"); + return true; +} + +static bool test_next_token(struct torture_context *tctx) +{ + const char *teststr = "foo bar bla"; + char buf[20]; + torture_assert(tctx, next_token(&teststr, buf, " ", 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "foo", "token matches"); + torture_assert_str_equal(tctx, teststr, "bar bla", "ptr modified correctly"); + + torture_assert(tctx, next_token(&teststr, buf, " ", 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "bar", "token matches"); + torture_assert_str_equal(tctx, teststr, "bla", "ptr modified correctly"); + + torture_assert(tctx, next_token(&teststr, buf, " ", 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "bla", "token matches"); + torture_assert_str_equal(tctx, teststr, "", "ptr modified correctly"); + + torture_assert(tctx, !next_token(&teststr, buf, " ", 20), "finding token doesn't work"); + return true; +} + +static bool test_next_token_implicit_sep(struct torture_context *tctx) +{ + const char *teststr = "foo\tbar\n bla"; + char buf[20]; + torture_assert(tctx, next_token(&teststr, buf, NULL, 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "foo", "token matches"); + torture_assert_str_equal(tctx, teststr, "bar\n bla", "ptr modified correctly"); + + torture_assert(tctx, next_token(&teststr, buf, NULL, 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "bar", "token matches"); + torture_assert_str_equal(tctx, teststr, " bla", "ptr modified correctly"); + + torture_assert(tctx, next_token(&teststr, buf, NULL, 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "bla", "token matches"); + torture_assert_str_equal(tctx, teststr, "", "ptr modified correctly"); + + torture_assert(tctx, !next_token(&teststr, buf, NULL, 20), "finding token doesn't work"); + return true; +} + +static bool test_next_token_seps(struct torture_context *tctx) +{ + const char *teststr = ",foo bla"; + char buf[20]; + torture_assert(tctx, next_token(&teststr, buf, ",", 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "foo bla", "token matches"); + torture_assert_str_equal(tctx, teststr, "", "ptr modified correctly"); + + torture_assert(tctx, !next_token(&teststr, buf, ",", 20), "finding token doesn't work"); + return true; +} + +static bool test_next_token_quotes(struct torture_context *tctx) +{ + const char *teststr = "\"foo bar\" bla"; + char buf[20]; + torture_assert(tctx, next_token(&teststr, buf, " ", 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "foo bar", "token matches"); + torture_assert_str_equal(tctx, teststr, "bla", "ptr modified correctly"); + + torture_assert(tctx, next_token(&teststr, buf, " ", 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "bla", "token matches"); + torture_assert_str_equal(tctx, teststr, "", "ptr modified correctly"); + + torture_assert(tctx, !next_token(&teststr, buf, " ", 20), "finding token doesn't work"); + return true; +} + +static bool test_next_token_quote_wrong(struct torture_context *tctx) +{ + const char *teststr = "\"foo bar bla"; + char buf[20]; + torture_assert(tctx, next_token(&teststr, buf, " ", 20), "finding token works"); + torture_assert_str_equal(tctx, buf, "foo bar bla", "token matches"); + torture_assert_str_equal(tctx, teststr, "", "ptr modified correctly"); + + torture_assert(tctx, !next_token(&teststr, buf, " ", 20), "finding token doesn't work"); + return true; +} + +static bool test_strlen_m(struct torture_context *tctx) +{ + torture_assert_int_equal(tctx, strlen_m("foo"), 3, "simple len"); + torture_assert_int_equal(tctx, strlen_m("foo\x83l"), 6, "extended len"); + torture_assert_int_equal(tctx, strlen_m(""), 0, "empty"); + torture_assert_int_equal(tctx, strlen_m(NULL), 0, "NULL"); + return true; +} + +static bool test_strlen_m_term(struct torture_context *tctx) +{ + torture_assert_int_equal(tctx, strlen_m_term("foo"), 4, "simple len"); + torture_assert_int_equal(tctx, strlen_m_term("foo\x83l"), 7, "extended len"); + torture_assert_int_equal(tctx, strlen_m_term(""), 1, "empty"); + torture_assert_int_equal(tctx, strlen_m_term(NULL), 0, "NULL"); + return true; +} + +static bool test_strlen_m_term_null(struct torture_context *tctx) +{ + torture_assert_int_equal(tctx, strlen_m_term_null("foo"), 4, "simple len"); + torture_assert_int_equal(tctx, strlen_m_term_null("foo\x83l"), 7, "extended len"); + torture_assert_int_equal(tctx, strlen_m_term_null(""), 0, "empty"); + torture_assert_int_equal(tctx, strlen_m_term_null(NULL), 0, "NULL"); + return true; +} + +static bool test_strhaslower(struct torture_context *tctx) +{ + torture_assert(tctx, strhaslower("a"), "one low char"); + torture_assert(tctx, strhaslower("aB"), "one low, one up char"); + torture_assert(tctx, !strhaslower("B"), "one up char"); + torture_assert(tctx, !strhaslower(""), "empty string"); + torture_assert(tctx, !strhaslower("3"), "one digit"); + return true; +} + +static bool test_strhasupper(struct torture_context *tctx) +{ + torture_assert(tctx, strhasupper("B"), "one up char"); + torture_assert(tctx, strhasupper("aB"), "one low, one up char"); + torture_assert(tctx, !strhasupper("a"), "one low char"); + torture_assert(tctx, !strhasupper(""), "empty string"); + torture_assert(tctx, !strhasupper("3"), "one digit"); + return true; +} + +static bool test_count_chars_m(struct torture_context *tctx) +{ + torture_assert_int_equal(tctx, count_chars_m("foo", 'o'), 2, "simple"); + torture_assert_int_equal(tctx, count_chars_m("", 'o'), 0, "empty"); + torture_assert_int_equal(tctx, count_chars_m("bla", 'o'), 0, "none"); + torture_assert_int_equal(tctx, count_chars_m("bla", '\0'), 0, "null"); + return true; +} + +struct torture_suite *torture_local_charset(TALLOC_CTX *mem_ctx) +{ + struct torture_suite *suite = torture_suite_create(mem_ctx, "charset"); + + torture_suite_add_simple_test(suite, "toupper_m", test_toupper_m); + torture_suite_add_simple_test(suite, "tolower_m", test_tolower_m); + torture_suite_add_simple_test(suite, "codepoint_cmpi", test_codepoint_cmpi); + torture_suite_add_simple_test(suite, "strcasecmp", test_strcasecmp); + torture_suite_add_simple_test(suite, "strcasecmp_m", test_strcasecmp_m); + torture_suite_add_simple_test(suite, "strequal_m", test_strequal_m); + torture_suite_add_simple_test(suite, "strcsequal", test_strcsequal); + torture_suite_add_simple_test(suite, "string_replace_m", test_string_replace_m); + torture_suite_add_simple_test(suite, "strncasecmp", test_strncasecmp); + torture_suite_add_simple_test(suite, "strncasecmp_m", test_strncasecmp_m); + torture_suite_add_simple_test(suite, "next_token", test_next_token); + torture_suite_add_simple_test(suite, "next_token_null", test_next_token_null); + torture_suite_add_simple_test(suite, "next_token_implicit_sep", test_next_token_implicit_sep); + torture_suite_add_simple_test(suite, "next_token_quotes", test_next_token_quotes); + torture_suite_add_simple_test(suite, "next_token_seps", test_next_token_seps); + torture_suite_add_simple_test(suite, "next_token_quote_wrong", test_next_token_quote_wrong); + torture_suite_add_simple_test(suite, "strlen_m", test_strlen_m); + torture_suite_add_simple_test(suite, "strlen_m_term", test_strlen_m_term); + torture_suite_add_simple_test(suite, "strlen_m_term_null", test_strlen_m_term_null); + torture_suite_add_simple_test(suite, "strhaslower", test_strhaslower); + torture_suite_add_simple_test(suite, "strhasupper", test_strhasupper); + torture_suite_add_simple_test(suite, "count_chars_m", test_count_chars_m); + + return suite; +} diff --git a/lib/util/charset/tests/convert_string.c b/lib/util/charset/tests/convert_string.c new file mode 100644 index 0000000..6400ce1 --- /dev/null +++ b/lib/util/charset/tests/convert_string.c @@ -0,0 +1,2196 @@ +/* + Unix SMB/CIFS implementation. + test suite for the charcnv functions + + Copyright (C) Andrew Bartlett 2011 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" +#include "torture/torture.h" +#include "lib/util/charset/charset.h" +#include "param/param.h" +#include "lib/util/base64.h" + +struct torture_suite *torture_local_convert_string_handle(TALLOC_CTX *mem_ctx); +struct torture_suite *torture_local_string_case_handle(TALLOC_CTX *mem_ctx); +struct torture_suite *torture_local_convert_string(TALLOC_CTX *mem_ctx); +struct torture_suite *torture_local_string_case(TALLOC_CTX *mem_ctx); + +/* The text below is in ancient and a latin charset transliteration of + * greek, and an english translation. It from Apology by Plato and sourced from + * http://en.wikipedia.org/w/index.php?title=Ancient_Greek&oldid=421361065#Example_text + */ + +const char *plato_english_ascii = + "What you, men of Athens, have learned from my accusers, I do not" + " know: but I, for my part, nearly forgot who I was thanks to them since" + " they spoke so persuasively. And yet, of the truth, they have spoken," + " one might say, nothing at all."; + +const char *plato_english_utf16le_base64 = + "VwBoAGEAdAAgAHkAbwB1ACwAIABtAGUAbgAgAG8AZgAgAEEAdABoAGUAbgBzACwAIABoAGEAdgBl" + "ACAAbABlAGEAcgBuAGUAZAAgAGYAcgBvAG0AIABtAHkAIABhAGMAYwB1AHMAZQByAHMALAAgAEkA" + "IABkAG8AIABuAG8AdAAgAGsAbgBvAHcAOgAgAGIAdQB0ACAASQAsACAAZgBvAHIAIABtAHkAIABw" + "AGEAcgB0ACwAIABuAGUAYQByAGwAeQAgAGYAbwByAGcAbwB0ACAAdwBoAG8AIABJACAAdwBhAHMA" + "IAB0AGgAYQBuAGsAcwAgAHQAbwAgAHQAaABlAG0AIABzAGkAbgBjAGUAIAB0AGgAZQB5ACAAcwBw" + "AG8AawBlACAAcwBvACAAcABlAHIAcwB1AGEAcwBpAHYAZQBsAHkALgAgAEEAbgBkACAAeQBlAHQA" + "LAAgAG8AZgAgAHQAaABlACAAdAByAHUAdABoACwAIAB0AGgAZQB5ACAAaABhAHYAZQAgAHMAcABv" + "AGsAZQBuACwAIABvAG4AZQAgAG0AaQBnAGgAdAAgAHMAYQB5ACwAIABuAG8AdABoAGkAbgBnACAA" + "YQB0ACAAYQBsAGwALgA="; + +static const char *plato_utf8_base64 = + "4b2Nz4TOuSDOvOG9ss69IOG9kc68zrXhv5bPgiwg4b2mIOG8hM69zrTPgc61z4IgzobOuM63zr3O" + "seG/ls6/zrksIM+AzrXPgM+Mzr3OuM6xz4TOtSDhvZHPgOG9uCDPhOG/ts69IOG8kM684b+2zr0g" + "zrrOsc+EzrfOs8+Mz4HPic69LCDOv+G9kM66IM6/4by2zrTOsTog4byQzrPhvbwgzrQnIM6/4b2W" + "zr0gzrrOseG9tiDOseG9kM+E4b24z4Ig4b2Rz4AnIM6x4b2Qz4Thv7bOvSDhvYDOu86vzrPOv8+F" + "IOG8kM68zrHPhc+Ezr/hv6Yg4byQz4DOtc67zrHOuM+MzrzOt869LCDOv+G9lc+Ez4kgz4DOuc64" + "zrHOveG/ts+CIOG8lM67zrXOs86/zr0uIM6azrHOr8+Ezr/OuSDhvIDOu863zrjOrc+CIM6zzrUg" + "4b2hz4Ig4byUz4DOv8+CIM614bywz4DOteG/ls69IM6/4b2QzrThvbLOvSDOteG8sM+Bzq7Ous6x" + "z4POuc69Lg=="; + +static const char *plato_utf16le_base64 = + "TR/EA7kDIAC8A3IfvQMgAFEfvAO1A9YfwgMsACAAZh8gAAQfvQO0A8EDtQPCAyAAhgO4A7cDvQOx" + "A9YfvwO5AywAIADAA7UDwAPMA70DuAOxA8QDtQMgAFEfwAN4HyAAxAP2H70DIAAQH7wD9h+9AyAA" + "ugOxA8QDtwOzA8wDwQPJA70DLAAgAL8DUB+6AyAAvwM2H7QDsQM6ACAAEB+zA3wfIAC0AycAIAC/" + "A1YfvQMgALoDsQN2HyAAsQNQH8QDeB/CAyAAUR/AAycAIACxA1AfxAP2H70DIABAH7sDrwOzA78D" + "xQMgABAfvAOxA8UDxAO/A+YfIAAQH8ADtQO7A7EDuAPMA7wDtwO9AywAIAC/A1UfxAPJAyAAwAO5" + "A7gDsQO9A/YfwgMgABQfuwO1A7MDvwO9Ay4AIACaA7EDrwPEA78DuQMgAAAfuwO3A7gDrQPCAyAA" + "swO1AyAAYR/CAyAAFB/AA78DwgMgALUDMB/AA7UD1h+9AyAAvwNQH7QDch+9AyAAtQMwH8EDrgO6" + "A7EDwwO5A70DLgA="; + +static const char *plato_latin_utf8_base64 = + "SMOzdGkgbcOobiBodW1lw65zLCDDtCDDoW5kcmVzIEF0aMSTbmHDrm9pLCBwZXDDs250aGF0ZSBo" + "dXDDsiB0w7RuIGVtw7RuIGthdMSTZ8OzcsWNbiwgb3VrIG/DrmRhOiBlZ+G5kSBkJyBvw7tuIGth" + "w6wgYXV0w7JzIGh1cCcgYXV0xY1uIG9sw61nb3UgZW1hdXRvw7sgZXBlbGF0aMOzbcSTbiwgaG/D" + "unTFjSBwaXRoYW7DtHMgw6lsZWdvbi4gS2HDrXRvaSBhbMSTdGjDqXMgZ2UgaMWNcyDDqXBvcyBl" + "aXBlw65uIG91ZMOobiBlaXLhuJdrYXNpbi4="; + +static const char *plato_latin_utf16le_base64 = + "SADzAHQAaQAgAG0A6ABuACAAaAB1AG0AZQDuAHMALAAgAPQAIADhAG4AZAByAGUAcwAgAEEAdABo" + "ABMBbgBhAO4AbwBpACwAIABwAGUAcADzAG4AdABoAGEAdABlACAAaAB1AHAA8gAgAHQA9ABuACAA" + "ZQBtAPQAbgAgAGsAYQB0ABMBZwDzAHIATQFuACwAIABvAHUAawAgAG8A7gBkAGEAOgAgAGUAZwBR" + "HiAAZAAnACAAbwD7AG4AIABrAGEA7AAgAGEAdQB0APIAcwAgAGgAdQBwACcAIABhAHUAdABNAW4A" + "IABvAGwA7QBnAG8AdQAgAGUAbQBhAHUAdABvAPsAIABlAHAAZQBsAGEAdABoAPMAbQATAW4ALAAg" + "AGgAbwD6AHQATQEgAHAAaQB0AGgAYQBuAPQAcwAgAOkAbABlAGcAbwBuAC4AIABLAGEA7QB0AG8A" + "aQAgAGEAbAATAXQAaADpAHMAIABnAGUAIABoAE0BcwAgAOkAcABvAHMAIABlAGkAcABlAO4AbgAg" + "AG8AdQBkAOgAbgAgAGUAaQByABceawBhAHMAaQBuAC4A"; + +static const char *gd_utf8_base64 = "R8O8bnRoZXIgRGVzY2huZXI="; +static const char *gd_utf8_upper_base64 = "R8OcTlRIRVIgREVTQ0hORVI="; +static const char *gd_utf8_lower_base64 = "Z8O8bnRoZXIgZGVzY2huZXI="; +static const char *gd_cp850_base64 = "R4FudGhlciBEZXNjaG5lcg=="; +static const char *gd_cp850_upper_base64 = "R5pOVEhFUiBERVNDSE5FUg=="; +static const char *gd_cp850_lower_base64 = "Z4FudGhlciBkZXNjaG5lcg=="; +static const char *gd_iso8859_1_base64 = "R/xudGhlciBEZXNjaG5lcg=="; +static const char *gd_utf16le_base64 = "RwD8AG4AdABoAGUAcgAgAEQAZQBzAGMAaABuAGUAcgA="; +/* täst */ +static const char *utf8_nfc_base64 = "dMOkc3QA"; +/* täst, where ä = a + combining diaeresis */ +static const char *utf8_nfd_base64 = "dGHMiHN0AA=="; + +/* + * These cp850 bytes correspond to high Unicode codes, stretching out to + * 3-byte sequences in utf-8. + */ +static const char *cp850_high_points = "\xb9\xba\xbb\xbc\xcd\xce"; +static const char *utf8_high_points = "╣║╗╝═╬"; + +static bool test_cp850_high_points(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle = NULL; + DATA_BLOB cp850 = data_blob_string_const(cp850_high_points); + DATA_BLOB utf8; + DATA_BLOB cp850_return; + + iconv_handle = get_iconv_testing_handle(tctx, "CP850", "UTF8", + lpcfg_parm_bool(tctx->lp_ctx, + NULL, + "iconv", + "use_builtin_handlers", + true)); + + torture_assert(tctx, iconv_handle, "creating iconv handle"); + + torture_assert(tctx, + convert_string_talloc_handle(tctx, iconv_handle, + CH_DOS, CH_UTF8, + cp850.data, cp850.length, + (void *)&utf8.data, &utf8.length), + "conversion from CP850 to UTF-8"); + + torture_assert(tctx, utf8.length == cp850.length * 3, + "CP850 high bytes expand to the right size"); + + torture_assert(tctx, + memcmp(utf8.data, utf8_high_points, utf8.length) == 0, + "cp850 converted to utf8 matches expected value"); + + torture_assert(tctx, + convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_DOS, + utf8.data, utf8.length, + (void *)&cp850_return.data, + &cp850_return.length), + "conversion from UTF-8 back to CP850"); + + torture_assert(tctx, data_blob_cmp(&cp850_return, &cp850) == 0, + "UTF-8 returned to CP850 matches the original"); + return true; +} + + +static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle; + DATA_BLOB gd_utf8 = base64_decode_data_blob(gd_utf8_base64); + DATA_BLOB gd_cp850 = base64_decode_data_blob(gd_cp850_base64); + DATA_BLOB gd_iso8859_1 = base64_decode_data_blob(gd_iso8859_1_base64); + DATA_BLOB gd_utf16le = base64_decode_data_blob(gd_utf16le_base64); + DATA_BLOB gd_output; + DATA_BLOB gd_output2; + + talloc_steal(tctx, gd_utf8.data); + talloc_steal(tctx, gd_cp850.data); + talloc_steal(tctx, gd_iso8859_1.data); + talloc_steal(tctx, gd_utf16le.data); + + iconv_handle = get_iconv_testing_handle(tctx, "ISO-8859-1", "CP850", + lpcfg_parm_bool(tctx->lp_ctx, NULL, "iconv", "use_builtin_handlers", true)); + torture_assert(tctx, iconv_handle, "getting iconv handle"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_DOS, + gd_utf8.data, gd_utf8.length, + (void *)&gd_output.data, &gd_output.length), + "conversion from UTF8 to (dos charset) ISO-8859-1"); + torture_assert_data_blob_equal(tctx, gd_output, gd_iso8859_1, "conversion from UTF8 to (dos charset) ISO-8859-1 incorrect"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_DOS, + gd_utf8.data, gd_utf8.length, + (void *)gd_output.data, gd_output.length, + &gd_output.length), + "conversion from UTF8 to (dos charset) ISO-8859-1"); + torture_assert_data_blob_equal(tctx, gd_output, gd_iso8859_1, "conversion from UTF8 to (dos charset) ISO-8859-1 incorrect"); + + /* Short output handling confirmation */ + gd_output.length = 1; + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_DOS, + gd_utf8.data, gd_utf8.length, + (void *)gd_output.data, gd_output.length, + &gd_output.length) == false, + "conversion from UTF8 to (dos charset) ISO-8859-1 should fail due to too short"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to (dos charset) ISO-8859-1 should fail E2BIG"); + torture_assert_int_equal(tctx, gd_output.length, 1, "Should only get 1 char of output"); + torture_assert_data_blob_equal(tctx, gd_output, data_blob_string_const("G"), "conversion from UTF8 to (dos charset) ISO-8859-1 incorrect"); + + /* Short output handling confirmation */ + gd_output.length = 2; + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_DOS, + gd_utf8.data, gd_utf8.length, + (void *)gd_output.data, gd_output.length, + &gd_output.length) == false, + "conversion from UTF8 to (dos charset) ISO-8859-1 should fail due to too short"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to (dos charset) ISO-8859-1 should fail E2BIG"); + torture_assert_int_equal(tctx, gd_output.length, 2, "Should only get 2 char of output"); + + /* Short input handling confirmation */ + gd_output.length = gd_iso8859_1.length; + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_DOS, + gd_utf8.data, 2, + (void *)gd_output.data, gd_output.length, + &gd_output.length) == false, + "conversion from UTF8 to (dos charset) ISO-8859-1 should fail due to too short"); + torture_assert_errno_equal(tctx, EILSEQ, "conversion from short UTF8 to (dos charset) ISO-8859-1 should fail EINVAL"); + torture_assert_int_equal(tctx, gd_output.length, 1, "Should only get 1 char of output"); + + /* Short output handling confirmation */ + gd_output.length = 1; + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le.data, gd_utf16le.length, + (void *)gd_output.data, gd_output.length, + &gd_output.length) == false, + "conversion from UTF16 to UTF8 should fail due to too short"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16 to (utf8 charset) ISO-8859-1 should fail E2BIG"); + torture_assert_int_equal(tctx, gd_output.length, 1, "Should only get 1 char of output"); + torture_assert_data_blob_equal(tctx, gd_output, data_blob_string_const("G"), "conversion from UTF16 to UTF8 incorrect"); + + /* Short output handling confirmation */ + gd_output.length = 3; + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le.data, gd_utf16le.length, + (void *)gd_output.data, gd_output.length, + &gd_output.length) == false, + "conversion from UTF16 to UTF8 should fail due to too short"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16 to (utf8 charset) ISO-8859-1 should fail E2BIG"); + torture_assert_int_equal(tctx, gd_output.length, 3, "Should get 3 bytes output for UTF8"); + + /* Short input handling confirmation */ + gd_output.length = gd_utf8.length; + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le.data, 3, + (void *)gd_output.data, gd_output.length, + &gd_output.length) == false, + "conversion from UTF16 to UTF8 should fail due to too short"); + torture_assert_errno_equal(tctx, EINVAL, "conversion from short UTF16 to UTF8 should fail EINVAL"); + torture_assert_int_equal(tctx, gd_output.length, 1, "Should only get 1 char of output"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_UNIX, + gd_utf8.data, gd_utf8.length, + (void *)&gd_output.data, &gd_output.length), + "conversion from UTF8 to (unix charset) CP850"); + torture_assert_data_blob_equal(tctx, gd_output, gd_cp850, "conversion from UTF8 to (unix charset) CP850 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_UTF8, + gd_utf8.data, gd_utf8.length, + (void *)&gd_output.data, &gd_output.length), + "conversion from UTF8 to UTF8"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF8 to UTF8 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF16LE, CH_DOS, + gd_utf16le.data, gd_utf16le.length, + (void *)&gd_output.data, &gd_output.length), + "conversion from UTF16LE to (dos charset) ISO-8859-1"); + torture_assert_data_blob_equal(tctx, gd_output, gd_iso8859_1, "conversion from UTF16LE to (dos charset) ISO-8859-1 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_DOS, CH_UTF16LE, + gd_output.data, gd_output.length, + (void *)&gd_output2.data, &gd_output2.length), + "round trip conversion from (dos charset) ISO-8859-1 back to UTF16LE"); + torture_assert_data_blob_equal(tctx, gd_output2, gd_utf16le, "round trip conversion from (dos charset) ISO-8859-1 back to UTF16LE"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF16LE, CH_UNIX, + gd_utf16le.data, gd_utf16le.length, + (void *)&gd_output.data, &gd_output.length), + "conversion from UTF16LE to (unix charset) CP850"); + torture_assert_data_blob_equal(tctx, gd_output, gd_cp850, "conversion from UTF16LE to (unix charset) CP850 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le.data, gd_utf16le.length, + (void *)&gd_output.data, &gd_output.length), + "conversion from UTF16LE to UTF8"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to UTF8 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_DOS, CH_DOS, + gd_iso8859_1.data, gd_iso8859_1.length, + (void *)&gd_output.data, &gd_output.length), + "conversion from (dos charset) ISO-8859-1 to (dos charset) ISO-8859-1"); + torture_assert_data_blob_equal(tctx, gd_output, gd_iso8859_1, "conversion from UTF16LE to (dos charset) ISO-8859-1 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_DOS, CH_UNIX, + gd_iso8859_1.data, gd_iso8859_1.length, + (void *)&gd_output.data, &gd_output.length), + "conversion from (dos charset) ISO-8859-1 to (unix charset) CP850"); + torture_assert_data_blob_equal(tctx, gd_output, gd_cp850, "conversion from UTF16LE to (unix charset) CP850 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_DOS, CH_UTF8, + gd_iso8859_1.data, gd_iso8859_1.length, + (void *)&gd_output.data, &gd_output.length), + "conversion from (dos charset) ISO-8859-1 to UTF8"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to UTF8 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_DOS, CH_UTF16LE, + gd_iso8859_1.data, gd_iso8859_1.length, + (void *)&gd_output.data, &gd_output.length), + "conversion from (dos charset) ISO-8859-1 to UTF16LE"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le, "conversion from (dos charset) ISO-8859-1 to UTF16LE"); + torture_assert_int_equal(tctx, + strlen_m_ext_handle(iconv_handle, + (const char *)gd_iso8859_1.data, + CH_DOS, CH_UTF16LE), + gd_output.length / 2, + "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to UTF8 and back again"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_DOS, CH_UTF8, + gd_iso8859_1.data, gd_iso8859_1.length, + (void *)&gd_output.data, &gd_output.length), + "conversion from (dos charset) ISO-8859-1 to UTF8"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from (dos charset) ISO-8859-1 to UTF8"); + torture_assert_int_equal(tctx, + strlen_m_ext_handle(iconv_handle, + (const char *)gd_iso8859_1.data, + CH_DOS, CH_UTF8), + gd_output.length, + "checking strlen_m_ext of conversion from (dos charset) ISO-8859-1 to UTF8"); + return true; +} + +static bool test_gd_minus_1_handle(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle; + DATA_BLOB gd_utf8 = base64_decode_data_blob(gd_utf8_base64); + DATA_BLOB gd_cp850 = base64_decode_data_blob(gd_cp850_base64); + DATA_BLOB gd_utf16le = base64_decode_data_blob(gd_utf16le_base64); + DATA_BLOB gd_output; + DATA_BLOB gd_utf8_terminated; + DATA_BLOB gd_cp850_terminated; + DATA_BLOB gd_utf16le_terminated; + + talloc_steal(tctx, gd_utf8.data); + talloc_steal(tctx, gd_cp850.data); + talloc_steal(tctx, gd_utf16le.data); + + iconv_handle = get_iconv_testing_handle(tctx, "CP850", "CP850", + lpcfg_parm_bool(tctx->lp_ctx, NULL, "iconv", "use_builtin_handlers", true)); + torture_assert(tctx, iconv_handle, "getting iconv handle"); + + gd_utf8_terminated = data_blob_talloc(tctx, NULL, gd_utf8.length + 1); + memcpy(gd_utf8_terminated.data, gd_utf8.data, gd_utf8.length); + gd_utf8_terminated.data[gd_utf8.length] = '\0'; + + gd_cp850_terminated = data_blob_talloc(tctx, NULL, gd_cp850.length + 1); + memcpy(gd_cp850_terminated.data, gd_cp850.data, gd_cp850.length); + gd_cp850_terminated.data[gd_cp850.length] = '\0'; + + gd_utf16le_terminated = data_blob_talloc(tctx, NULL, gd_utf16le.length + 2); + memcpy(gd_utf16le_terminated.data, gd_utf16le.data, gd_utf16le.length); + gd_utf16le_terminated.data[gd_utf16le.length] = '\0'; + gd_utf16le_terminated.data[gd_utf16le.length + 1] = '\0'; + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_utf16le.length, &gd_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le, "conversion from UTF8 to UTF16LE null terminated"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_utf16le.length - 1, &gd_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_utf16le.length - 2, &gd_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_utf8.length, &gd_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to UTF8 null terminated"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_utf8.length - 1, &gd_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_utf8.length - 2, &gd_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + gd_output = data_blob_talloc(tctx, NULL, gd_cp850.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_DOS, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF16LE to CP850 (dos) null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_cp850_terminated, "conversion from UTF16LE to CP850 (dos) null terminated"); + + /* Now null terminate the string early, the confirm we don't skip the NULL and convert any further */ + gd_utf8_terminated.data[3] = '\0'; + gd_utf8_terminated.length = 4; /* used for the comparison only */ + + gd_cp850_terminated.data[2] = '\0'; + gd_cp850_terminated.length = 3; /* used for the comparison only */ + + gd_utf16le_terminated.data[4] = '\0'; + gd_utf16le_terminated.data[5] = '\0'; + gd_utf16le_terminated.length = 6; /* used for the comparison only */ + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated early"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated early"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_DOS, CH_UTF16LE, + gd_cp850_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from CP850 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated early"); + + gd_output = data_blob_talloc(tctx, NULL, gd_cp850.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_DOS, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_cp850_terminated, "conversion from UTF16LE to UTF8 null terminated early"); + + /* Now null terminate the string particularly early, the confirm we don't skip the NULL and convert any further */ + gd_utf8_terminated.data[1] = '\0'; + gd_utf8_terminated.length = 2; /* used for the comparison only */ + + gd_utf16le_terminated.data[2] = '\0'; + gd_utf16le_terminated.data[3] = '\0'; + gd_utf16le_terminated.length = 4; /* used for the comparison only */ + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated very early"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated very early"); + + return true; +} + +static bool test_gd_ascii_handle(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle; + DATA_BLOB gd_utf8 = base64_decode_data_blob(gd_utf8_base64); + DATA_BLOB gd_cp850 = base64_decode_data_blob(gd_cp850_base64); + DATA_BLOB gd_iso8859_1 = base64_decode_data_blob(gd_iso8859_1_base64); + DATA_BLOB gd_utf16le = base64_decode_data_blob(gd_utf16le_base64); + DATA_BLOB gd_output; + + talloc_steal(tctx, gd_utf8.data); + talloc_steal(tctx, gd_cp850.data); + talloc_steal(tctx, gd_iso8859_1.data); + talloc_steal(tctx, gd_utf16le.data); + + iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8", + lpcfg_parm_bool(tctx->lp_ctx, NULL, "iconv", "use_builtin_handlers", true)); + torture_assert(tctx, iconv_handle, "getting iconv handle"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_DOS, + gd_utf8.data, gd_utf8.length, + (void *)&gd_output.data, &gd_output.length) == false, + "conversion from UTF8 to (dos charset) ASCII should fail"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_DOS, + gd_utf8.data, gd_utf8.length, + (void *)gd_output.data, gd_output.length, + &gd_output.length) == false, + "conversion from UTF8 to (dos charset) ASCII should fail"); + torture_assert_errno_equal(tctx, EILSEQ, "conversion from UTF8 to (dos charset) ISO-8859-1 should fail E2BIG"); + torture_assert_int_equal(tctx, gd_output.length, 1, "Should only get 1 char of output"); + torture_assert_data_blob_equal(tctx, gd_output, data_blob_string_const("G"), "partial conversion from UTF8 to (dos charset) ASCII incorrect"); + + /* Short output handling confirmation */ + gd_output.length = 1; + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_DOS, + gd_utf8.data, gd_utf8.length, + (void *)gd_output.data, gd_output.length, + &gd_output.length) == false, + "conversion from UTF8 to (dos charset) ASCII should fail due to too short"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to (dos charset) ASCII too short"); + torture_assert_int_equal(tctx, gd_output.length, 1, "Should only get 1 char of output"); + torture_assert_data_blob_equal(tctx, gd_output, data_blob_string_const("G"), "conversion from UTF8 to (dos charset) ASCII incorrect"); + + /* Short output handling confirmation */ + gd_output.length = 2; + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_DOS, + gd_utf8.data, gd_utf8.length, + (void *)gd_output.data, gd_output.length, + &gd_output.length) == false, + "conversion from UTF8 to (dos charset) ASCII should fail due to illegal sequence"); + torture_assert_errno_equal(tctx, EILSEQ, "conversion from UTF8 to (dos charset) ISO-8859-1 should fail EILSEQ"); + torture_assert_int_equal(tctx, gd_output.length, 1, "Should only get 2 char of output"); + + /* Short input handling confirmation */ + gd_output.length = gd_utf8.length; + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_DOS, + gd_utf8.data, 2, + (void *)gd_output.data, gd_output.length, + &gd_output.length) == false, + "conversion from UTF8 to (dos charset) ASCII should fail due to too short"); + torture_assert_errno_equal(tctx, EILSEQ, "conversion from short UTF8 to (dos charset) ASCII should fail EILSEQ"); + torture_assert_int_equal(tctx, gd_output.length, 1, "Should only get 1 char of output"); + return true; +} + +static bool test_plato_english_iso8859_cp850_handle(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle; + DATA_BLOB plato_english_utf8 = data_blob_string_const(plato_english_ascii); + DATA_BLOB plato_english_cp850 = plato_english_utf8; + DATA_BLOB plato_english_iso8859_1 = plato_english_utf8; + DATA_BLOB plato_english_utf16le = base64_decode_data_blob(plato_english_utf16le_base64); + DATA_BLOB plato_english_output; + DATA_BLOB plato_english_output2; + + talloc_steal(tctx, plato_english_utf16le.data); + + iconv_handle = get_iconv_testing_handle(tctx, "ISO-8859-1", "CP850", + lpcfg_parm_bool(tctx->lp_ctx, NULL, "iconv", "use_builtin_handlers", true)); + torture_assert(tctx, iconv_handle, "getting iconv handle"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_DOS, + plato_english_utf8.data, plato_english_utf8.length, + (void *)&plato_english_output.data, &plato_english_output.length), + "conversion from UTF8 to (dos charset) ISO-8859-1"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_iso8859_1, "conversion from UTF8 to (dos charset) ISO-8859-1 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_UNIX, + plato_english_utf8.data, plato_english_utf8.length, + (void *)&plato_english_output.data, &plato_english_output.length), + "conversion from UTF8 to (unix charset) CP850"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_cp850, "conversion from UTF8 to (unix charset) CP850 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_UTF8, + plato_english_utf8.data, plato_english_utf8.length, + (void *)&plato_english_output.data, &plato_english_output.length), + "conversion from UTF8 to UTF8"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF8 to UTF8 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF16LE, CH_DOS, + plato_english_utf16le.data, plato_english_utf16le.length, + (void *)&plato_english_output.data, &plato_english_output.length), + "conversion from UTF16LE to (dos charset) ISO-8859-1"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_iso8859_1, "conversion from UTF16LE to (dos charset) ISO-8859-1 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_DOS, CH_UTF16LE, + plato_english_output.data, plato_english_output.length, + (void *)&plato_english_output2.data, &plato_english_output2.length), + "round trip conversion from (dos charset) ISO-8859-1 back to UTF16LE"); + torture_assert_data_blob_equal(tctx, plato_english_output2, plato_english_utf16le, "round trip conversion from (dos charset) ISO-8859-1 back to UTF16LE"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le.data, plato_english_utf16le.length, + (void *)&plato_english_output.data, &plato_english_output.length), + "conversion from UTF16LE to UTF8"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to UTF8 incorrect"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le.data, plato_english_utf16le.length, + (void *)plato_english_output.data, plato_english_output.length, + &plato_english_output.length), + "conversion from UTF16LE to UTF8"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to UTF8 incorrect"); + + plato_english_output.length = 5; + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le.data, plato_english_utf16le.length, + (void *)plato_english_output.data, plato_english_output.length, + &plato_english_output.length) == false, + "conversion from UTF16LE to UTF8 should fail due to short output"); + torture_assert_data_blob_equal(tctx, plato_english_output, data_blob_string_const("What "), "conversion from UTF16LE to UTF8 incorrect"); + torture_assert_int_equal(tctx, plato_english_output.length, 5, "short conversion failed"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF16LE, CH_UNIX, + plato_english_utf16le.data, plato_english_utf16le.length, + (void *)&plato_english_output.data, &plato_english_output.length), + "conversion from UTF16LE to (unix charset) CP850"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_cp850, "conversion from UTF16LE to (unix charset) CP850 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le.data, plato_english_utf16le.length, + (void *)&plato_english_output.data, &plato_english_output.length), + "conversion from UTF16LE to UTF8"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to UTF8 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_DOS, CH_DOS, + plato_english_iso8859_1.data, plato_english_iso8859_1.length, + (void *)&plato_english_output.data, &plato_english_output.length), + "conversion from (dos charset) ISO-8859-1 to (dos charset) ISO-8859-1"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_iso8859_1, "conversion from UTF16LE to (dos charset) ISO-8859-1 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_DOS, CH_UNIX, + plato_english_iso8859_1.data, plato_english_iso8859_1.length, + (void *)&plato_english_output.data, &plato_english_output.length), + "conversion from (dos charset) ISO-8859-1 to (unix charset) CP850"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_cp850, "conversion from UTF16LE to (unix charset) CP850 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_DOS, CH_UTF8, + plato_english_iso8859_1.data, plato_english_iso8859_1.length, + (void *)&plato_english_output.data, &plato_english_output.length), + "conversion from (dos charset) ISO-8859-1 to UTF8"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to UTF8 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_DOS, CH_UTF16LE, + plato_english_iso8859_1.data, plato_english_iso8859_1.length, + (void *)&plato_english_output.data, &plato_english_output.length), + "conversion from (dos charset) ISO-8859-1 to UTF16LE"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf16le, "conversion from (dos charset) ISO-8859-1 to UTF16LE"); + return true; +} + +static bool test_plato_english_minus_1_handle(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle; + DATA_BLOB plato_english_utf8 = data_blob_string_const(plato_english_ascii); + DATA_BLOB plato_english_utf16le = base64_decode_data_blob(plato_english_utf16le_base64); + DATA_BLOB plato_english_output; + DATA_BLOB plato_english_utf8_terminated; + DATA_BLOB plato_english_utf16le_terminated; + + talloc_steal(tctx, plato_english_utf16le.data); + + iconv_handle = get_iconv_testing_handle(tctx, "ISO-8859-1", "CP850", + lpcfg_parm_bool(tctx->lp_ctx, NULL, "iconv", "use_builtin_handlers", true)); + torture_assert(tctx, iconv_handle, "getting iconv handle"); + + plato_english_utf8_terminated = data_blob_talloc(tctx, NULL, plato_english_utf8.length + 1); + memcpy(plato_english_utf8_terminated.data, plato_english_utf8.data, plato_english_utf8.length); + plato_english_utf8_terminated.data[plato_english_utf8.length] = '\0'; + + plato_english_utf16le_terminated = data_blob_talloc(tctx, NULL, plato_english_utf16le.length + 2); + memcpy(plato_english_utf16le_terminated.data, plato_english_utf16le.data, plato_english_utf16le.length); + plato_english_utf16le_terminated.data[plato_english_utf16le.length] = '\0'; + plato_english_utf16le_terminated.data[plato_english_utf16le.length + 1] = '\0'; + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf16le.length, &plato_english_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf16le, "conversion from UTF8 to UTF16LE null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf16le.length - 1, &plato_english_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf16le.length - 2, &plato_english_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf8.length, &plato_english_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to UTF8 null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf8.length - 1, &plato_english_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf8.length - 2, &plato_english_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + /* Now null terminate the string early, the confirm we don't skip the NULL and convert any further */ + plato_english_utf8_terminated.data[3] = '\0'; + plato_english_utf8_terminated.length = 4; /* used for the comparison only */ + + plato_english_utf16le_terminated.data[6] = '\0'; + plato_english_utf16le_terminated.data[7] = '\0'; + plato_english_utf16le_terminated.length = 8; /* used for the comparison only */ + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated early"); + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated early"); + + + /* Now null terminate the string particularly early, the confirm we don't skip the NULL and convert any further */ + plato_english_utf8_terminated.data[1] = '\0'; + plato_english_utf8_terminated.length = 2; /* used for the comparison only */ + + plato_english_utf16le_terminated.data[2] = '\0'; + plato_english_utf16le_terminated.data[3] = '\0'; + plato_english_utf16le_terminated.length = 4; /* used for the comparison only */ + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated very early"); + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated very early"); + + return true; +} + +static bool test_plato_minus_1_handle(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle; + DATA_BLOB plato_utf8 = base64_decode_data_blob(plato_utf8_base64); + DATA_BLOB plato_utf16le = base64_decode_data_blob(plato_utf16le_base64); + DATA_BLOB plato_output; + DATA_BLOB plato_utf8_terminated; + DATA_BLOB plato_utf16le_terminated; + + talloc_steal(tctx, plato_utf8.data); + talloc_steal(tctx, plato_utf16le.data); + + iconv_handle = get_iconv_testing_handle(tctx, "ISO-8859-1", "CP850", + lpcfg_parm_bool(tctx->lp_ctx, NULL, "iconv", "use_builtin_handlers", true)); + torture_assert(tctx, iconv_handle, "getting iconv handle"); + + plato_utf8_terminated = data_blob_talloc(tctx, NULL, plato_utf8.length + 1); + memcpy(plato_utf8_terminated.data, plato_utf8.data, plato_utf8.length); + plato_utf8_terminated.data[plato_utf8.length] = '\0'; + + plato_utf16le_terminated = data_blob_talloc(tctx, NULL, plato_utf16le.length + 2); + memcpy(plato_utf16le_terminated.data, plato_utf16le.data, plato_utf16le.length); + plato_utf16le_terminated.data[plato_utf16le.length] = '\0'; + plato_utf16le_terminated.data[plato_utf16le.length + 1] = '\0'; + + plato_output = data_blob_talloc(tctx, NULL, plato_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8_terminated.data, -1, + (void *)plato_output.data, plato_output.length, &plato_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8_terminated.data, -1, + (void *)plato_output.data, plato_utf16le.length, &plato_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf16le, "conversion from UTF8 to UTF16LE null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8_terminated.data, -1, + (void *)plato_output.data, plato_utf16le.length - 1, &plato_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8_terminated.data, -1, + (void *)plato_output.data, plato_utf16le.length - 2, &plato_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + plato_output = data_blob_talloc(tctx, NULL, plato_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le_terminated.data, -1, + (void *)plato_output.data, plato_output.length, &plato_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le_terminated.data, -1, + (void *)plato_output.data, plato_utf8.length, &plato_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to UTF8 null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le_terminated.data, -1, + (void *)plato_output.data, plato_utf8.length - 1, &plato_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le_terminated.data, -1, + (void *)plato_output.data, plato_utf8.length - 2, &plato_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + /* Now null terminate the string early, the confirm we don't skip the NULL and convert any further */ + plato_utf8_terminated.data[5] = '\0'; + plato_utf8_terminated.length = 6; /* used for the comparison only */ + + plato_utf16le_terminated.data[4] = '\0'; + plato_utf16le_terminated.data[5] = '\0'; + plato_utf16le_terminated.length = 6; /* used for the comparison only */ + + plato_output = data_blob_talloc(tctx, NULL, plato_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8_terminated.data, -1, + (void *)plato_output.data, plato_output.length, &plato_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated early"); + + plato_output = data_blob_talloc(tctx, NULL, plato_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le_terminated.data, -1, + (void *)plato_output.data, plato_output.length, &plato_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated early"); + + return true; +} + +static bool test_plato_cp850_utf8_handle(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle; + DATA_BLOB plato_utf8 = base64_decode_data_blob(plato_utf8_base64); + DATA_BLOB plato_utf16le = base64_decode_data_blob(plato_utf16le_base64); + DATA_BLOB plato_output; + DATA_BLOB plato_output2; + + talloc_steal(tctx, plato_utf8.data); + talloc_steal(tctx, plato_utf16le.data); + + iconv_handle = get_iconv_testing_handle(tctx, "CP850", "UTF8", + lpcfg_parm_bool(tctx->lp_ctx, NULL, "iconv", "use_builtin_handlers", true)); + torture_assert(tctx, iconv_handle, "creating iconv handle"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8.data, plato_utf8.length, + (void *)&plato_output.data, &plato_output.length), + "conversion of UTF8 ancient greek to UTF16 failed"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf16le, "conversion from UTF8 to UTF16LE incorrect"); + + torture_assert_int_equal(tctx, + strlen_m_ext_handle(iconv_handle, + (const char *)plato_utf8.data, + CH_UTF8, CH_UTF16LE), + plato_output.length / 2, + "checking strlen_m_ext of conversion of UTF8 to UTF16LE"); + + memset(plato_output.data, '\0', plato_output.length); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8.data, plato_utf8.length, + (void *)plato_output.data, plato_output.length, + &plato_output.length), + "conversion of UTF8 ancient greek to UTF16 failed"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf16le, "conversion from UTF8 to UTF16LE incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_output.data, plato_output.length, + (void *)&plato_output2.data, &plato_output2.length), + "conversion of UTF8 ancient greek to UTF16 failed"); + torture_assert_data_blob_equal(tctx, plato_output2, plato_utf8, "conversion from UTF8 to UTF16LE incorrect"); + + memset(plato_output2.data, '\0', plato_output2.length); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_output.data, plato_output.length, + (void *)plato_output2.data, plato_output2.length, &plato_output2.length), + "conversion of UTF8 ancient greek to UTF16 failed"); + torture_assert_data_blob_equal(tctx, plato_output2, plato_utf8, "conversion from UTF8 to UTF16LE incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_UTF8, + plato_utf8.data, plato_utf8.length, + (void *)&plato_output.data, &plato_output.length), + "conversion of UTF8 to UTF8"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, + "conversion of UTF8 to UTF8"); + torture_assert_int_equal(tctx, + strlen_m_ext_handle(iconv_handle, + (const char *)plato_utf8.data, + CH_UTF8, CH_UTF8), + plato_output.length, + "checking strlen_m_ext of conversion of UTF8 to UTF8"); + memset(plato_output.data, '\0', plato_output.length); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF8, + plato_utf8.data, plato_utf8.length, + (void *)plato_output.data, plato_output.length, + &plato_output.length), + "conversion of UTF8 to UTF8"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_DOS, + plato_utf8.data, plato_utf8.length, + (void *)&plato_output.data, &plato_output.length) == false, + "conversion of UTF8 ancient greek to DOS charset CP850 should fail"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_UNIX, + plato_utf8.data, plato_utf8.length, + (void *)&plato_output.data, &plato_output.length), + "conversion of UTF16 ancient greek to unix charset UTF8 failed"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF8 to (unix charset) UTF8 incorrect"); + + memset(plato_output.data, '\0', plato_output.length); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UNIX, + plato_utf8.data, plato_utf8.length, + (void *)plato_output.data, plato_output.length, + &plato_output.length), + "conversion of UTF16 ancient greek to unix charset UTF8 failed"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF8 to (unix charset) UTF8 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_UTF8, + plato_utf8.data, plato_utf8.length, + (void *)&plato_output.data, &plato_output.length), + "conversion of UTF16 ancient greek to unix charset UTF8 failed"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF8 to UTF8 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF16LE, CH_DOS, + plato_utf16le.data, plato_utf16le.length, + (void *)&plato_output.data, &plato_output.length) == false, + "conversion of UTF16 ancient greek to DOS charset CP850 should fail"); + + /* Allocate enough space, if it were possible do do the conversion */ + plato_output = data_blob_talloc(tctx, NULL, plato_utf16le.length); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_DOS, + plato_utf16le.data, plato_utf16le.length, + (void *)plato_output.data, plato_output.length, + &plato_output.length) == false, + "conversion of UTF16 ancient greek to DOS charset CP850 should fail"); + torture_assert_errno_equal(tctx, EILSEQ, "conversion of UTF16 ancient greek to DOS charset CP850 should fail"); + + /* Allocate only enough space for a partial conversion */ + plato_output = data_blob_talloc(tctx, NULL, 9); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le.data, plato_utf16le.length, + (void *)plato_output.data, plato_output.length, + &plato_output.length) == false, + "conversion of UTF16 ancient greek to UTF8 should fail, not enough space"); + torture_assert_errno_equal(tctx, E2BIG, "conversion of UTF16 ancient greek to UTF8 should fail, not enough space"); + torture_assert_int_equal(tctx, plato_output.length, 8, + "conversion of UTF16 ancient greek to UTF8 should stop on multibyte boundary"); + + plato_output = data_blob_talloc(tctx, NULL, 2); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le.data, plato_utf16le.length, + (void *)plato_output.data, plato_output.length, + &plato_output.length) == false, + "conversion of UTF16 ancient greek to UTF8 should fail, not enough space"); + torture_assert_errno_equal(tctx, E2BIG, "conversion of UTF16 ancient greek to UTF8 should fail, not enough space"); + torture_assert_int_equal(tctx, plato_output.length, 0, + "conversion of UTF16 ancient greek to UTF8 should stop on multibyte boundary"); + + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF16LE, CH_UNIX, + plato_utf16le.data, plato_utf16le.length, + (void *)&plato_output.data, &plato_output.length), + "conversion of UTF16 ancient greek to unix charset UTF8 failed"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to (unix charset) UTF8 incorrect"); + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le.data, plato_utf16le.length, + (void *)&plato_output.data, &plato_output.length), + "conversion of UTF16 ancient greek to UTF8 failed"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to UTF8 incorrect"); + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le.data, plato_utf16le.length, + (void *)&plato_output.data, &plato_output.length), + "conversion of UTF16 ancient greek to UTF8 failed"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to UTF8 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_output.data, plato_output.length, + (void *)&plato_output2.data, &plato_output2.length), + "round trip conversion of UTF16 ancient greek to UTF8 and back again failed"); + torture_assert_data_blob_equal(tctx, plato_output2, plato_utf16le, + "round trip conversion of UTF16 ancient greek to UTF8 and back again failed"); + torture_assert_int_equal(tctx, + strlen_m_ext_handle(iconv_handle, + (const char *)plato_output.data, + CH_UTF8, CH_UTF16LE), + plato_output2.length / 2, + "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to UTF8 and back again"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_UTF8, + plato_output.data, plato_output.length, + (void *)&plato_output2.data, &plato_output2.length), + "conversion of UTF8 to UTF8"); + torture_assert_data_blob_equal(tctx, plato_output2, plato_utf8, + "conversion of UTF8 to UTF8"); + torture_assert_int_equal(tctx, + strlen_m_ext_handle(iconv_handle, + (const char *)plato_output.data, + CH_UTF8, CH_UTF8), + plato_output2.length, + "checking strlen_m_ext of conversion of UTF8 to UTF8"); + return true; +} + +static bool test_plato_latin_cp850_utf8_handle(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle; + DATA_BLOB plato_latin_utf8 = base64_decode_data_blob(plato_latin_utf8_base64); + DATA_BLOB plato_latin_utf16le = base64_decode_data_blob(plato_latin_utf16le_base64); + DATA_BLOB plato_latin_output; + DATA_BLOB plato_latin_output2; + + talloc_steal(tctx, plato_latin_utf8.data); + talloc_steal(tctx, plato_latin_utf16le.data); + + iconv_handle = get_iconv_testing_handle(tctx, "CP850", "UTF8", + lpcfg_parm_bool(tctx->lp_ctx, NULL, "iconv", "use_builtin_handlers", true)); + torture_assert(tctx, iconv_handle, "creating iconv handle"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_DOS, + plato_latin_utf8.data, plato_latin_utf8.length, + (void *)&plato_latin_output.data, &plato_latin_output.length) == false, + "conversion of UTF8 latin charset greek to DOS charset CP850 should fail"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_UNIX, + plato_latin_utf8.data, plato_latin_utf8.length, + (void *)&plato_latin_output.data, &plato_latin_output.length), + "conversion of UTF16 latin charset greek to unix charset UTF8 failed"); + torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF8 to (unix charset) UTF8 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_UTF8, + plato_latin_utf8.data, plato_latin_utf8.length, + (void *)&plato_latin_output.data, &plato_latin_output.length), + "conversion of UTF16 latin charset greek to unix charset UTF8 failed"); + torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF8 to UTF8 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF16LE, CH_DOS, + plato_latin_utf16le.data, plato_latin_utf16le.length, + (void *)&plato_latin_output.data, &plato_latin_output.length) == false, + "conversion of UTF16 latin charset greek to DOS charset CP850 should fail"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF16LE, CH_UNIX, + plato_latin_utf16le.data, plato_latin_utf16le.length, + (void *)&plato_latin_output.data, &plato_latin_output.length), + "conversion of UTF16 latin charset greek to unix charset UTF8 failed"); + torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF16LE to (unix charset) CP850 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_latin_utf16le.data, plato_latin_utf16le.length, + (void *)&plato_latin_output.data, &plato_latin_output.length), + "conversion of UTF16 latin charset greek to UTF8 failed"); + torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF16LE to UTF8 incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_latin_output.data, plato_latin_output.length, + (void *)&plato_latin_output2.data, &plato_latin_output2.length), + "round trip conversion of UTF16 latin charset greek to UTF8 and back again failed"); + torture_assert_data_blob_equal(tctx, plato_latin_output2, plato_latin_utf16le, + "round trip conversion of UTF16 latin charset greek to UTF8 and back again failed"); + torture_assert_int_equal(tctx, + strlen_m_ext_handle(iconv_handle, + (const char *)plato_latin_output.data, + CH_UTF8, CH_UTF16LE), + plato_latin_output2.length / 2, + "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to UTF8 and back again"); + return true; +} + +static bool test_utf8_nfc_to_nfd_overflow(struct torture_context *tctx) +{ + smb_iconv_t ic; + DATA_BLOB utf8_nfc_blob; + DATA_BLOB utf8_nfd_blob; + DATA_BLOB src_blob; + DATA_BLOB blob; + size_t nconv; + const char *src = NULL; + char *dst = NULL; + size_t dst_left; + size_t srclen; + bool ret = true; + + ic = smb_iconv_open("UTF8-NFD", "UTF8-NFC"); + torture_assert_goto(tctx, ic != (smb_iconv_t)-1, ret, done, + "creating iconv handle\n"); + + utf8_nfc_blob = base64_decode_data_blob_talloc(tctx, utf8_nfc_base64); + torture_assert_not_null_goto(tctx, utf8_nfc_blob.data, ret, done, + "OOM\n"); + + utf8_nfd_blob = base64_decode_data_blob_talloc(tctx, utf8_nfd_base64); + torture_assert_not_null_goto(tctx, utf8_nfd_blob.data, ret, done, + "OOM\n"); + + blob = data_blob_talloc_zero(tctx, 255); + torture_assert_not_null_goto(tctx, blob.data, ret, done, "OOM\n"); + + /* + * Unfortunately the current implementation that performs the conversion + * (using libicu) returns EINVAL if the result buffer is too small, not + * E2BIG like iconv(). + */ + + src = "foo"; + srclen = 3; + dst = (char *)blob.data; + dst_left = 0; + nconv = smb_iconv(ic, + &src, + &srclen, + &dst, + &dst_left); + torture_assert_int_equal_goto(tctx, nconv, -1, ret, done, + "smb_iconv failed\n"); + torture_assert_errno_equal_goto(tctx, EINVAL, ret, done, + "Wrong errno\n"); + + src = "foo"; + srclen = 3; + dst = (char *)blob.data; + dst_left = 1; + nconv = smb_iconv(ic, + &src, + &srclen, + &dst, + &dst_left); + torture_assert_int_equal_goto(tctx, nconv, -1, ret, done, + "smb_iconv failed\n"); + torture_assert_errno_equal_goto(tctx, EINVAL, ret, done, + "Wrong errno\n"); + + src = "foo"; + srclen = 3; + dst = (char *)blob.data; + dst_left = 2; + nconv = smb_iconv(ic, + &src, + &srclen, + &dst, + &dst_left); + torture_assert_int_equal_goto(tctx, nconv, -1, ret, done, + "smb_iconv failed\n"); + torture_assert_errno_equal_goto(tctx, EINVAL, ret, done, + "Wrong errno\n"); + + src_blob = data_blob_const("foo", 3); + src = (const char *)src_blob.data; + srclen = src_blob.length; + dst = (char *)blob.data; + dst_left = 3; + nconv = smb_iconv(ic, + &src, + &srclen, + &dst, + &dst_left); + torture_assert_int_equal_goto(tctx, nconv, 3, ret, done, + "smb_iconv failed\n"); + + blob.length = nconv; + torture_assert_data_blob_equal(tctx, + src_blob, + blob, + "Conversion failed\n"); + + src_blob = data_blob_const("foo", 4); + src = (const char *)src_blob.data; + srclen = src_blob.length; + dst = (char *)blob.data; + dst_left = 4; + nconv = smb_iconv(ic, + &src, + &srclen, + &dst, + &dst_left); + torture_assert_int_equal_goto(tctx, nconv, 4, ret, done, + "smb_iconv failed\n"); + + blob.length = nconv; + torture_assert_data_blob_equal(tctx, + src_blob, + blob, + "Conversion failed\n"); + +done: + return ret; +} + +static bool test_utf8_nfc_to_nfd(struct torture_context *tctx) +{ + smb_iconv_t ic; + DATA_BLOB utf8_nfc_blob; + DATA_BLOB utf8_nfd_blob; + DATA_BLOB blob; + size_t nconv; + const char *src = NULL; + char *dst = NULL; + size_t dst_left; + size_t srclen; + bool ret = true; + + ic = smb_iconv_open("UTF8-NFD", "UTF8-NFC"); + torture_assert_goto(tctx, ic != (smb_iconv_t)-1, ret, done, + "creating iconv handle\n"); + + utf8_nfc_blob = base64_decode_data_blob_talloc(tctx, utf8_nfc_base64); + torture_assert_not_null_goto(tctx, utf8_nfc_blob.data, ret, done, + "OOM\n"); + + utf8_nfd_blob = base64_decode_data_blob_talloc(tctx, utf8_nfd_base64); + torture_assert_not_null_goto(tctx, utf8_nfd_blob.data, ret, done, + "OOM\n"); + + blob = data_blob_talloc_zero(tctx, 255); + torture_assert_not_null_goto(tctx, blob.data, ret, done, "OOM\n"); + + dst = (char *)blob.data; + dst_left = blob.length; + src = (const char *)utf8_nfc_blob.data; + srclen = strlen(src); + + nconv = smb_iconv(ic, + &src, + &srclen, + &dst, + &dst_left); + torture_assert_goto(tctx, nconv != (size_t)-1, ret, done, + "smb_iconv failed\n"); + + blob.length = nconv + 1; /* +1 for the trailing zero */ + torture_assert_data_blob_equal(tctx, + blob, + utf8_nfd_blob, + "Conversion failed\n"); + +done: + return ret; +} + +static bool test_utf8_nfd_to_nfc(struct torture_context *tctx) +{ + smb_iconv_t ic; + DATA_BLOB utf8_nfc_blob; + DATA_BLOB utf8_nfd_blob; + DATA_BLOB blob; + size_t nconv; + const char *src = NULL; + char *dst = NULL; + size_t dst_left; + size_t srclen; + bool ret = true; + + ic = smb_iconv_open("UTF8-NFC", "UTF8-NFD"); + torture_assert_goto(tctx, ic != (smb_iconv_t)-1, ret, done, + "creating iconv handle\n"); + + utf8_nfc_blob = base64_decode_data_blob_talloc(tctx, utf8_nfc_base64); + torture_assert_not_null_goto(tctx, utf8_nfc_blob.data, ret, done, + "OOM\n"); + + utf8_nfd_blob = base64_decode_data_blob_talloc(tctx, utf8_nfd_base64); + torture_assert_not_null_goto(tctx, utf8_nfd_blob.data, ret, done, + "OOM\n"); + + blob = data_blob_talloc_zero(tctx, 255); + torture_assert_not_null_goto(tctx, blob.data, ret, done, "OOM\n"); + + dst = (char *)blob.data; + dst_left = blob.length; + src = (const char *)utf8_nfd_blob.data; + srclen = strlen(src); + + nconv = smb_iconv(ic, + &src, + &srclen, + &dst, + &dst_left); + torture_assert_goto(tctx, nconv != (size_t)-1, ret, done, + "smb_iconv failed\n"); + + blob.length = nconv + 1; /* +1 for the trailing zero */ + torture_assert_data_blob_equal(tctx, + blob, + utf8_nfc_blob, + "Conversion failed\n"); + +done: + return ret; +} + +static bool test_gd_case_utf8_handle(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle; + DATA_BLOB gd_utf8 = base64_decode_data_blob(gd_utf8_base64); + DATA_BLOB gd_utf8_upper = base64_decode_data_blob(gd_utf8_upper_base64); + DATA_BLOB gd_utf8_lower = base64_decode_data_blob(gd_utf8_lower_base64); + char *gd_lower, *gd_upper; + talloc_steal(tctx, gd_utf8.data); + + iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8", + lpcfg_parm_bool(tctx->lp_ctx, NULL, "iconv", "use_builtin_handlers", true)); + torture_assert(tctx, iconv_handle, "getting utf8 iconv handle"); + + torture_assert(tctx, + strhasupper_handle(iconv_handle, (const char *)gd_utf8.data), + "GD's name has an upper case character"); + torture_assert(tctx, + strhaslower_handle(iconv_handle, (const char *)gd_utf8.data), + "GD's name has an lower case character"); + gd_lower = strlower_talloc_handle(iconv_handle, tctx, (const char *)gd_utf8.data); + torture_assert(tctx, gd_lower, "failed to convert GD's name into lower case"); + torture_assert_data_blob_equal(tctx, data_blob_string_const(gd_lower), gd_utf8_lower, + "convert GD's name into lower case"); + gd_upper = strupper_talloc_n_handle(iconv_handle, tctx, (const char *)gd_utf8.data, gd_utf8.length); + torture_assert(tctx, gd_lower, "failed to convert GD's name into upper case"); + torture_assert_data_blob_equal(tctx, data_blob_string_const(gd_upper), gd_utf8_upper, + "convert GD's name into upper case"); + + torture_assert(tctx, + strhasupper_handle(iconv_handle, gd_upper), + "upper case name has an upper case character"); + torture_assert(tctx, + strhaslower_handle(iconv_handle, gd_lower), + "lower case name has an lower case character"); + torture_assert(tctx, + strhasupper_handle(iconv_handle, gd_lower) == false, + "lower case name has no upper case character"); + torture_assert(tctx, + strhaslower_handle(iconv_handle, gd_upper) == false, + "upper case name has no lower case character"); + + torture_assert(tctx, strcasecmp_m_handle(iconv_handle, (const char *)gd_utf8.data, + gd_upper) == 0, + "case insensitive comparison orig/upper"); + torture_assert(tctx, strcasecmp_m_handle(iconv_handle, (const char *)gd_utf8.data, + gd_lower) == 0, + "case insensitive comparison orig/lower"); + torture_assert(tctx, strcasecmp_m_handle(iconv_handle, gd_upper, + gd_lower) == 0, + "case insensitive comparison upper/lower"); + + /* This string isn't different in length upper/lower */ + torture_assert(tctx, strncasecmp_m_handle(iconv_handle, (const char *)gd_utf8.data, + gd_upper, gd_utf8.length) == 0, + "case insensitive comparison orig/upper"); + torture_assert(tctx, strncasecmp_m_handle(iconv_handle, (const char *)gd_utf8.data, + gd_lower, gd_utf8.length) == 0, + "case insensitive comparison orig/lower"); + torture_assert(tctx, strncasecmp_m_handle(iconv_handle, gd_upper, + gd_lower, gd_utf8.length) == 0, + "case insensitive comparison upper/lower"); + + data_blob_free(&gd_utf8); + data_blob_free(&gd_utf8_upper); + data_blob_free(&gd_utf8_lower); + + return true; +} + +static bool test_gd_case_cp850_handle(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle; + DATA_BLOB gd_cp850 = base64_decode_data_blob(gd_cp850_base64); + DATA_BLOB gd_cp850_upper = base64_decode_data_blob(gd_cp850_upper_base64); + DATA_BLOB gd_cp850_lower = base64_decode_data_blob(gd_cp850_lower_base64); + char *gd_lower, *gd_upper; + talloc_steal(tctx, gd_cp850.data); + + iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "CP850", + lpcfg_parm_bool(tctx->lp_ctx, NULL, "iconv", "use_builtin_handlers", true)); + torture_assert(tctx, iconv_handle, "getting cp850 iconv handle"); + + torture_assert(tctx, + strhasupper_handle(iconv_handle, (const char *)gd_cp850.data), + "GD's name has an upper case character"); + torture_assert(tctx, + strhaslower_handle(iconv_handle, (const char *)gd_cp850.data), + "GD's name has an lower case character"); + gd_lower = strlower_talloc_handle(iconv_handle, tctx, (const char *)gd_cp850.data); + torture_assert(tctx, gd_lower, "failed to convert GD's name into lower case"); + torture_assert_data_blob_equal(tctx, data_blob_string_const(gd_lower), gd_cp850_lower, + "convert GD's name into lower case"); + gd_upper = strupper_talloc_n_handle(iconv_handle, tctx, (const char *)gd_cp850.data, gd_cp850.length); + torture_assert(tctx, gd_lower, "failed to convert GD's name into upper case"); + torture_assert_data_blob_equal(tctx, data_blob_string_const(gd_upper), gd_cp850_upper, + "convert GD's name into upper case"); + + torture_assert(tctx, + strhasupper_handle(iconv_handle, gd_upper), + "upper case name has an upper case character"); + torture_assert(tctx, + strhaslower_handle(iconv_handle, gd_lower), + "lower case name has an lower case character"); + torture_assert(tctx, + strhasupper_handle(iconv_handle, gd_lower) == false, + "lower case name has no upper case character"); + torture_assert(tctx, + strhaslower_handle(iconv_handle, gd_upper) == false, + "upper case name has no lower case character"); + + torture_assert(tctx, strcasecmp_m_handle(iconv_handle, (const char *)gd_cp850.data, + gd_upper) == 0, + "case insensitive comparison orig/upper"); + torture_assert(tctx, strcasecmp_m_handle(iconv_handle, (const char *)gd_cp850.data, + gd_lower) == 0, + "case insensitive comparison orig/lower"); + torture_assert(tctx, strcasecmp_m_handle(iconv_handle, gd_upper, + gd_lower) == 0, + "case insensitive comparison upper/lower"); + + /* This string isn't different in length upper/lower */ + torture_assert(tctx, strncasecmp_m_handle(iconv_handle, (const char *)gd_cp850.data, + gd_upper, gd_cp850.length) == 0, + "case insensitive comparison orig/upper"); + torture_assert(tctx, strncasecmp_m_handle(iconv_handle, (const char *)gd_cp850.data, + gd_lower, gd_cp850.length) == 0, + "case insensitive comparison orig/lower"); + torture_assert(tctx, strncasecmp_m_handle(iconv_handle, gd_upper, + gd_lower, gd_cp850.length) == 0, + "case insensitive comparison upper/lower"); + + data_blob_free(&gd_cp850); + data_blob_free(&gd_cp850_upper); + data_blob_free(&gd_cp850_lower); + + return true; +} + +static bool test_plato_case_utf8_handle(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle; + DATA_BLOB plato_utf8 = base64_decode_data_blob(plato_utf8_base64); + char *plato_lower, *plato_upper; + talloc_steal(tctx, plato_utf8.data); + + iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8", + lpcfg_parm_bool(tctx->lp_ctx, NULL, "iconv", "use_builtin_handlers", true)); + torture_assert(tctx, iconv_handle, "getting utf8 iconv handle"); + + torture_assert(tctx, + strhasupper_handle(iconv_handle, (const char *)plato_utf8.data), + "PLATO's apology has an upper case character"); + torture_assert(tctx, + strhaslower_handle(iconv_handle, (const char *)plato_utf8.data), + "PLATO's apology has an lower case character"); + plato_lower = strlower_talloc_handle(iconv_handle, tctx, (const char *)plato_utf8.data); + torture_assert(tctx, plato_lower, "failed to convert PLATO's apology into lower case"); + plato_upper = strupper_talloc_n_handle(iconv_handle, tctx, (const char *)plato_utf8.data, plato_utf8.length); + torture_assert(tctx, plato_lower, "failed to convert PLATO's apology into upper case"); + + torture_assert(tctx, + strhasupper_handle(iconv_handle, plato_upper), + "upper case string has an upper case character"); + torture_assert(tctx, + strhaslower_handle(iconv_handle, plato_lower), + "lower case string has an lower case character"); + torture_assert(tctx, + strhasupper_handle(iconv_handle, plato_lower) == false, + "lower case string has no upper case character"); + torture_assert(tctx, + strhaslower_handle(iconv_handle, plato_upper) == false, + "upper case string has no lower case character"); + + torture_assert(tctx, strcasecmp_m_handle(iconv_handle, (const char *)plato_utf8.data, + plato_upper) == 0, + "case insensitive comparison orig/upper"); + torture_assert(tctx, strcasecmp_m_handle(iconv_handle, (const char *)plato_utf8.data, + plato_lower) == 0, + "case insensitive comparison orig/lower"); + torture_assert(tctx, strcasecmp_m_handle(iconv_handle, plato_upper, + plato_lower) == 0, + "case insensitive comparison upper/lower"); + return true; +} + +static bool test_gd(struct torture_context *tctx) +{ + DATA_BLOB gd_utf8 = base64_decode_data_blob(gd_utf8_base64); + DATA_BLOB gd_cp850 = base64_decode_data_blob(gd_cp850_base64); + DATA_BLOB gd_iso8859_1 = base64_decode_data_blob(gd_iso8859_1_base64); + DATA_BLOB gd_utf16le = base64_decode_data_blob(gd_utf16le_base64); + DATA_BLOB gd_output; + size_t saved_len; + + talloc_steal(tctx, gd_utf8.data); + talloc_steal(tctx, gd_cp850.data); + talloc_steal(tctx, gd_iso8859_1.data); + talloc_steal(tctx, gd_utf16le.data); + + torture_assert(tctx, convert_string_talloc(tctx, CH_UTF8, CH_UTF8, + gd_utf8.data, gd_utf8.length, + (void *)&gd_output.data, &gd_output.length), + "conversion from UTF8 to utf8 charset"); + saved_len = gd_output.length; + + torture_assert(tctx, convert_string_error(CH_UTF8, CH_UTF8, + gd_utf8.data, gd_utf8.length, + (void *)gd_output.data, gd_output.length, + &gd_output.length), + "conversion from UTF8 to utf8 charset"); + + /* Short output handling confirmation */ + gd_output.length = 1; + torture_assert(tctx, convert_string_error(CH_UTF8, CH_UTF8, + gd_utf8.data, gd_utf8.length, + (void *)gd_output.data, gd_output.length, + &gd_output.length) == false, + "conversion from UTF8 to any utf8 charset should fail due to too short"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to utf8 charset should fail E2BIG"); + torture_assert_int_equal(tctx, gd_output.length, 1, "Should only get 1 char of output"); + torture_assert_data_blob_equal(tctx, gd_output, data_blob_string_const("G"), "conversion from UTF8 to utf8 charset incorrect"); + +#if 0 /* This currently fails as we just copy like-for-like character conversions */ + /* Short output handling confirmation */ + gd_output.length = 2; + torture_assert(tctx, convert_string_error(CH_UTF8, CH_UTF8, + gd_utf8.data, gd_utf8.length, + (void *)gd_output.data, gd_output.length, + &gd_output.length) == false, + "conversion from UTF8 to utf8 charset should fail due to too short"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to utf8 charset should fail E2BIG"); + torture_assert_int_equal(tctx, gd_output.length, 1, "Should only get 1 char of output"); + + /* Short input handling confirmation */ + gd_output.length = saved_len; + torture_assert(tctx, convert_string_error(CH_UTF8, CH_UTF8, + gd_utf8.data, 2, + (void *)gd_output.data, gd_output.length, + &gd_output.length) == false, + "conversion from UTF8 to UTF8 should fail due to too short"); + torture_assert_errno_equal(tctx, EILSEQ, "conversion from short UTF8 to UTF8 should fail EINVAL"); + torture_assert_int_equal(tctx, gd_output.length, 1, "Should only get 1 char of output"); +#endif + + /* Short output handling confirmation */ + gd_output.length = 1; + torture_assert(tctx, convert_string_error(CH_UTF16LE, CH_UTF8, + gd_utf16le.data, gd_utf16le.length, + (void *)gd_output.data, gd_output.length, + &gd_output.length) == false, + "conversion from UTF16 to UTF8 should fail due to too short"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16 to UTF8 should fail E2BIG"); + torture_assert_int_equal(tctx, gd_output.length, 1, "Should only get 1 char of output"); + torture_assert_data_blob_equal(tctx, gd_output, data_blob_string_const("G"), "conversion from UTF16 to UTF8 incorrect"); + + /* Short output handling confirmation */ + gd_output.length = 3; + torture_assert(tctx, convert_string_error(CH_UTF16LE, CH_UTF8, + gd_utf16le.data, gd_utf16le.length, + (void *)gd_output.data, gd_output.length, + &gd_output.length) == false, + "conversion from UTF16 to UTF8 should fail due to too short"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16 to UTF8 should fail E2BIG"); + torture_assert_int_equal(tctx, gd_output.length, 3, "Should get 3 bytes output for UTF8"); + + /* Short input handling confirmation */ + gd_output.length = saved_len; + torture_assert(tctx, convert_string_error(CH_UTF16LE, CH_UTF8, + gd_utf16le.data, 3, + (void *)gd_output.data, gd_output.length, + &gd_output.length) == false, + "conversion from UTF16 to UTF8 should fail due to too short"); + torture_assert_errno_equal(tctx, EINVAL, "conversion from short UTF16 to UTF8 should fail EINVAL"); + torture_assert_int_equal(tctx, gd_output.length, 1, "Should only get 1 char of output"); + + return true; +} + +static bool test_plato(struct torture_context *tctx) +{ + DATA_BLOB plato_utf8 = base64_decode_data_blob(plato_utf8_base64); + DATA_BLOB plato_utf16le = base64_decode_data_blob(plato_utf16le_base64); + DATA_BLOB plato_output; + DATA_BLOB plato_output2; + + talloc_steal(tctx, plato_utf8.data); + talloc_steal(tctx, plato_utf16le.data); + + torture_assert(tctx, convert_string_talloc(tctx, + CH_UTF8, CH_UTF16LE, + plato_utf8.data, plato_utf8.length, + (void *)&plato_output.data, &plato_output.length), + "conversion of UTF8 ancient greek to UTF16 failed"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf16le, "conversion from UTF8 to UTF16LE incorrect"); + + torture_assert_int_equal(tctx, + strlen_m_ext((const char *)plato_utf8.data, + CH_UTF8, CH_UTF16LE), + plato_output.length / 2, + "checking strlen_m_ext of conversion of UTF8 to UTF16LE"); + + memset(plato_output.data, '\0', plato_output.length); + torture_assert(tctx, convert_string_error(CH_UTF8, CH_UTF16LE, + plato_utf8.data, plato_utf8.length, + (void *)plato_output.data, plato_output.length, + &plato_output.length), + "conversion of UTF8 ancient greek to UTF16 failed"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf16le, "conversion from UTF8 to UTF16LE incorrect"); + + torture_assert(tctx, convert_string_talloc(tctx, + CH_UTF16LE, CH_UTF8, + plato_output.data, plato_output.length, + (void *)&plato_output2.data, &plato_output2.length), + "conversion of UTF8 ancient greek to UTF16 failed"); + torture_assert_data_blob_equal(tctx, plato_output2, plato_utf8, "conversion from UTF8 to UTF16LE incorrect"); + + memset(plato_output2.data, '\0', plato_output2.length); + torture_assert(tctx, convert_string_error(CH_UTF16LE, CH_UTF8, + plato_output.data, plato_output.length, + (void *)plato_output2.data, plato_output2.length, &plato_output2.length), + "conversion of UTF8 ancient greek to UTF16 failed"); + torture_assert_data_blob_equal(tctx, plato_output2, plato_utf8, "conversion from UTF8 to UTF16LE incorrect"); + + torture_assert(tctx, convert_string_talloc(tctx, + CH_UTF8, CH_UTF8, + plato_utf8.data, plato_utf8.length, + (void *)&plato_output.data, &plato_output.length), + "conversion of UTF8 to UTF8"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, + "conversion of UTF8 to UTF8"); + torture_assert_int_equal(tctx, + strlen_m_ext((const char *)plato_utf8.data, + CH_UTF8, CH_UTF8), + plato_output.length, + "checking strlen_m_ext of conversion of UTF8 to UTF8"); + memset(plato_output.data, '\0', plato_output.length); + torture_assert(tctx, convert_string_error(CH_UTF8, CH_UTF8, + plato_utf8.data, plato_utf8.length, + (void *)plato_output.data, plato_output.length, + &plato_output.length), + "conversion of UTF8 to UTF8"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, + "conversion of UTF8 to UTF8"); + + memset(plato_output.data, '\0', plato_output.length); + torture_assert(tctx, convert_string_error(CH_UTF8, CH_DOS, + plato_utf8.data, plato_utf8.length, + (void *)plato_output.data, plato_output.length, + &plato_output.length) == false, + "conversion of UTF8 to any dos charset should fail"); + torture_assert_errno_equal(tctx, EILSEQ, "conversion of UTF16 ancient greek to any DOS charset should fail EILSEQ"); + + torture_assert(tctx, convert_string_talloc(tctx, + CH_UTF8, CH_DOS, + plato_utf8.data, plato_utf8.length, + (void *)&plato_output.data, &plato_output.length) == false, + "conversion of UTF8 ancient greek to any DOS charset should fail"); + + /* Allocate only enough space for a partial conversion */ + plato_output = data_blob_talloc(tctx, NULL, 9); + torture_assert(tctx, convert_string_error(CH_UTF16LE, CH_UTF8, + plato_utf16le.data, plato_utf16le.length, + (void *)plato_output.data, plato_output.length, + &plato_output.length) == false, + "conversion of UTF16 ancient greek to UTF8 should fail, not enough space"); + torture_assert_errno_equal(tctx, E2BIG, "conversion of UTF16 ancient greek to UTF8 should fail, not enough space"); + torture_assert_int_equal(tctx, plato_output.length, 8, + "conversion of UTF16 ancient greek to UTF8 should stop on multibyte boundary"); + + plato_output = data_blob_talloc(tctx, NULL, 2); + torture_assert(tctx, convert_string_error(CH_UTF16LE, CH_UTF8, + plato_utf16le.data, plato_utf16le.length, + (void *)plato_output.data, plato_output.length, + &plato_output.length) == false, + "conversion of UTF16 ancient greek to UTF8 should fail, not enough space"); + torture_assert_errno_equal(tctx, E2BIG, "conversion of UTF16 ancient greek to UTF8 should fail, not enough space"); + torture_assert_int_equal(tctx, plato_output.length, 0, + "conversion of UTF16 ancient greek to UTF8 should stop on multibyte boundary"); + + + return true; +} + + + +static bool test_short_strings(struct torture_context *tctx) +{ + char zeros[6] = {0}; + char s[6] = {'s'}; + bool ok; + char *out; + size_t out_len; + + ok = convert_string_talloc(tctx, + CH_UTF8, CH_UTF16LE, + zeros, 0, + &out, &out_len); + torture_assert(tctx, ok, "{\"\", 0} to utf16 failed"); + torture_assert(tctx, out_len == 2, "{\"\", 0} length is two"); + torture_assert(tctx, out[0] == 0 && out[1] == 0, "{\"\", 0} utf16 is zero"); + TALLOC_FREE(out); + + ok = convert_string_talloc(tctx, + CH_UTF8, CH_UTF16LE, + zeros, 1, + &out, &out_len); + torture_assert(tctx, ok, "{\"\\0\", 1} to utf16 failed"); + torture_assert(tctx, out_len == 2, "{\"\\0\", 1} length is two"); + torture_assert(tctx, out[0] == 0 && out[1] == 0, "{\"\\0\", 1} utf16 is zero"); + TALLOC_FREE(out); + + ok = convert_string_talloc(tctx, + CH_UTF8, CH_UTF16LE, + zeros, 2, + &out, &out_len); + torture_assert(tctx, ok, "{\"\\0\\0\", 2} to utf16 failed"); + torture_assert(tctx, out_len == 4, "{\"\\0\\0\", 2} length is four"); + torture_assert(tctx, out[0] == 0 && out[1] == 0, "{\"\\0\\0\", 2} utf16 is zero"); + TALLOC_FREE(out); + + ok = convert_string_talloc(tctx, + CH_UTF8, CH_UTF16LE, + s, 0, + &out, &out_len); + torture_assert(tctx, ok, "{\"s\", 0} to utf16 failed"); + torture_assert(tctx, out_len == 2, "{\"s\", 0} length is two"); + torture_assert(tctx, out[0] == 0 && out[1] == 0, + "{\"s\", 0} utf16 is zero"); + TALLOC_FREE(out); + + ok = convert_string_talloc(tctx, + CH_UTF8, CH_UTF16LE, + s, 1, + &out, &out_len); + torture_assert(tctx, ok, "{\"s\", 1} to utf16 failed"); + torture_assert(tctx, out_len == 2, "{\"s\", 1} length is two"); + torture_assert(tctx, out[0] == 's' && out[1] == 0, + "{\"s\", 1} utf16 is s"); + TALLOC_FREE(out); + + ok = convert_string_talloc(tctx, + CH_UTF8, CH_UTF16LE, + s, 2, + &out, &out_len); + torture_assert(tctx, ok, "{\"s\\0\", 2} to utf16 failed"); + torture_assert(tctx, out_len == 4, "{\"s\\0\", 2} length is four"); + torture_assert(tctx, out[0] == 's' && out[1] == 0, + "{\"s\\0\", 0} utf16 is s"); + TALLOC_FREE(out); + + + /* going to utf8 */ + ok = convert_string_talloc(tctx, + CH_UTF16LE, CH_UTF8, + zeros, 0, + &out, &out_len); + torture_assert(tctx, ok, "{\"\", 0} to utf8 failed"); + torture_assert(tctx, out_len == 1, "{\"\", 0} length is one"); + torture_assert(tctx, out[0] == 0, "{\"\", 0} utf8[0] is zero"); + TALLOC_FREE(out); + + ok = convert_string_talloc(tctx, + CH_UTF16LE, CH_UTF8, + zeros, 2, + &out, &out_len); + torture_assert(tctx, ok, "{\"\\0\", 1} to utf8 failed"); + torture_assert(tctx, out_len == 1, "{\"\\0\", 1} length is one"); + torture_assert(tctx, out[0] == 0 && out[1] == 0, + "{\"\\0\", 1} utf8 is zero"); + TALLOC_FREE(out); + + ok = convert_string_talloc(tctx, + CH_UTF16LE, CH_UTF8, + zeros, 4, + &out, &out_len); + torture_assert(tctx, ok, "{\"\\0\\0\\0\\0\", 4} to utf8 failed"); + torture_assert(tctx, out_len == 2, "{\"\\0\\0\\0\\0\", 4} length is two"); + torture_assert(tctx, out[0] == 0 && out[1] == 0, + "{\"\\0\\0\\0\\0\", 4} utf8 is zero"); + TALLOC_FREE(out); + + ok = convert_string_talloc(tctx, + CH_UTF16LE, CH_UTF8, + s, 0, + &out, &out_len); + torture_assert(tctx, ok, "{\"s\", 0} to utf8 failed"); + torture_assert(tctx, out_len == 1, "{\"s\", 0} length is one"); + torture_assert(tctx, out[0] == 0, "{\"s\", 0} utf8 is zero"); + TALLOC_FREE(out); + + ok = convert_string_talloc(tctx, + CH_UTF16LE, CH_UTF8, + s, 2, + &out, &out_len); + torture_assert(tctx, ok, "{\"s\\0\", 2} to utf8 failed"); + torture_assert(tctx, out_len == 1, "{\"s\\0\", 2} length is one"); + torture_assert(tctx, out[0] == 's' && out[1] == 0, + "{\"s\\0\", 2} utf8 is s"); + TALLOC_FREE(out); + + + ok = convert_string_talloc(tctx, + CH_UTF16LE, CH_UTF8, + s, 4, + &out, &out_len); + torture_assert(tctx, ok, "{\"s\\0\\0\\0\", 4} utf8 failed"); + torture_assert(tctx, out_len == 2, "\"s\\0\\0\\0\", 4} utf8 length is two"); + torture_assert(tctx, out[0] == 's' && out[1] == 0, + "{\"s\\0\\0\\0\", 4} utf8 is s"); + TALLOC_FREE(out); + + /* odd numbers of bytes from UTF-16 should fail */ + ok = convert_string_talloc(tctx, + CH_UTF16LE, CH_UTF8, + s, 1, + &out, &out_len); + torture_assert(tctx, ! ok, "{\"s\", 1} to utf8 should have failed"); + + ok = convert_string_talloc(tctx, + CH_UTF16LE, CH_UTF8, + s, 3, + &out, &out_len); + torture_assert(tctx, ! ok, "{\"s\\0\\0\", 3} to utf8 should have failed"); + + ok = convert_string_talloc(tctx, + CH_UTF16LE, CH_UTF8, + zeros, 1, + &out, &out_len); + torture_assert(tctx, ! ok, + "{\"\\0\", 1} to utf8 should have failed"); + + ok = convert_string_talloc(tctx, + CH_UTF16LE, CH_UTF8, + zeros, 5, + &out, &out_len); + torture_assert(tctx, ! ok, + "{\"\\0\\0\\0\\0\", 5} to utf8 should have failed"); + + return true; +} + + +static bool test_plato_latin(struct torture_context *tctx) +{ + DATA_BLOB plato_latin_utf8 = base64_decode_data_blob(plato_latin_utf8_base64); + DATA_BLOB plato_latin_utf16le = base64_decode_data_blob(plato_latin_utf16le_base64); + DATA_BLOB plato_latin_output; + + talloc_steal(tctx, plato_latin_utf8.data); + talloc_steal(tctx, plato_latin_utf16le.data); + + torture_assert(tctx, convert_string_talloc(tctx, + CH_UTF16LE, CH_UTF8, + plato_latin_utf16le.data, plato_latin_utf16le.length, + (void *)&plato_latin_output.data, &plato_latin_output.length), + "conversion of UTF16 latin charset greek to unix charset UTF8 failed"); + torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF16 to UTF8 incorrect"); + + torture_assert_int_equal(tctx, + strlen_m_ext((const char *)plato_latin_output.data, + CH_UTF8, CH_UTF16LE), + plato_latin_utf16le.length / 2, + "checking strlen_m_ext UTF16 latin charset greek to UTF8"); + torture_assert(tctx, convert_string_talloc(tctx, + CH_UTF8, CH_UTF16LE, + plato_latin_utf8.data, plato_latin_utf8.length, + (void *)&plato_latin_output.data, &plato_latin_output.length), + "conversion of UTF16 latin charset greek to UTF16LE failed"); + torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf16le, "conversion from UTF8 to UTF16LE incorrect"); + + return true; +} + +static bool test_gd_case(struct torture_context *tctx) +{ + DATA_BLOB gd_utf8 = base64_decode_data_blob(gd_utf8_base64); + char *gd_unix; + size_t gd_size; + char *gd_lower, *gd_upper; + talloc_steal(tctx, gd_utf8.data); + + torture_assert(tctx, convert_string_talloc(tctx, CH_UTF8, CH_UNIX, + gd_utf8.data, gd_utf8.length, + (void *)&gd_unix, &gd_size), + "conversion of unix charset to UTF8"); + + gd_lower = strlower_talloc(tctx, gd_unix); + torture_assert(tctx, gd_lower, "failed to convert GD's name into lower case"); + gd_upper = strupper_talloc_n(tctx, gd_unix, gd_size); + torture_assert(tctx, gd_lower, "failed to convert GD's name into upper case"); + + torture_assert(tctx, + strhasupper(gd_unix), + "GD's name has an upper case character"); + torture_assert(tctx, + strhaslower(gd_unix), + "GD's name has an lower case character"); + torture_assert(tctx, + strhasupper(gd_upper), + "upper case name has an upper case character"); + torture_assert(tctx, + strhaslower(gd_lower), + "lower case name has an lower case character"); + torture_assert(tctx, + strhasupper(gd_lower) == false, + "lower case name has no upper case character"); + torture_assert(tctx, + strhaslower(gd_upper) == false, + "upper case name has no lower case character"); + + torture_assert(tctx, strcasecmp_m(gd_unix, + gd_upper) == 0, + "case insensitive comparison orig/upper"); + torture_assert(tctx, strcasecmp_m(gd_unix, + gd_lower) == 0, + "case insensitive comparison orig/lower"); + torture_assert(tctx, strcasecmp_m(gd_upper, + gd_lower) == 0, + "case insensitive comparison upper/lower"); + + /* This string isn't different in length upper/lower, but just check the first 5 chars */ + torture_assert(tctx, strncasecmp_m(gd_unix, + gd_upper, 5) == 0, + "case insensitive comparison orig/upper"); + torture_assert(tctx, strncasecmp_m(gd_unix, + gd_lower, 5) == 0, + "case insensitive comparison orig/lower"); + torture_assert(tctx, strncasecmp_m(gd_upper, + gd_lower, 5) == 0, + "case insensitive comparison upper/lower"); + return true; +} + +static bool test_plato_case(struct torture_context *tctx) +{ + DATA_BLOB plato_utf8 = base64_decode_data_blob(plato_utf8_base64); + char *plato_unix; + size_t plato_length; + char *plato_lower, *plato_upper; + talloc_steal(tctx, plato_utf8.data); + + torture_assert(tctx, convert_string_talloc(tctx, CH_UTF8, CH_UNIX, + plato_utf8.data, plato_utf8.length, + (void *)&plato_unix, &plato_length), + "conversion of unix charset to UTF8"); + + torture_assert(tctx, + strhasupper(plato_unix), + "PLATO's apology has an upper case character"); + torture_assert(tctx, + strhaslower(plato_unix), + "PLATO's apology has an lower case character"); + plato_lower = strlower_talloc(tctx, plato_unix); + torture_assert(tctx, plato_lower, "failed to convert PLATO's apology into lower case"); + plato_upper = strupper_talloc_n(tctx, plato_unix, plato_utf8.length); + torture_assert(tctx, plato_lower, "failed to convert PLATO's apology into upper case"); + + torture_assert(tctx, + strhasupper(plato_upper), + "upper case string has an upper case character"); + torture_assert(tctx, + strhaslower(plato_lower), + "lower case string has an lower case character"); + torture_assert(tctx, + strhasupper(plato_lower) == false, + "lower case string has no upper case character"); + torture_assert(tctx, + strhaslower(plato_upper) == false, + "upper case string has no lower case character"); + + torture_assert(tctx, strcasecmp_m(plato_unix, + plato_upper) == 0, + "case insensitive comparison orig/upper"); + torture_assert(tctx, strcasecmp_m(plato_unix, + plato_lower) == 0, + "case insensitive comparison orig/lower"); + torture_assert(tctx, strcasecmp_m(plato_upper, + plato_lower) == 0, + "case insensitive comparison upper/lower"); + return true; +} + +struct torture_suite *torture_local_convert_string_handle(TALLOC_CTX *mem_ctx) +{ + struct torture_suite *suite = torture_suite_create(mem_ctx, "convert_string_handle"); + torture_suite_add_simple_test(suite, "cp850 high points", test_cp850_high_points); + + torture_suite_add_simple_test(suite, "gd_ascii", test_gd_ascii_handle); + torture_suite_add_simple_test(suite, "gd_minus_1", test_gd_minus_1_handle); + torture_suite_add_simple_test(suite, "gd_iso8859_cp850", test_gd_iso8859_cp850_handle); + torture_suite_add_simple_test(suite, "plato_english_iso8859_cp850", test_plato_english_iso8859_cp850_handle); + torture_suite_add_simple_test(suite, "plato_english_minus_1", test_plato_english_minus_1_handle); + torture_suite_add_simple_test(suite, "plato_cp850_utf8", test_plato_cp850_utf8_handle); + torture_suite_add_simple_test(suite, "plato_minus_1", test_plato_minus_1_handle); + torture_suite_add_simple_test(suite, "plato_latin_cp850_utf8", test_plato_latin_cp850_utf8_handle); + torture_suite_add_simple_test(suite, "utf8-nfc-to-nfd", test_utf8_nfc_to_nfd); + torture_suite_add_simple_test(suite, "utf8-nfc-to-nfd-overflow", test_utf8_nfc_to_nfd_overflow); + torture_suite_add_simple_test(suite, "utf8-nfd-to-nfc", test_utf8_nfd_to_nfc); + return suite; +} + +struct torture_suite *torture_local_string_case_handle(TALLOC_CTX *mem_ctx) +{ + struct torture_suite *suite = torture_suite_create(mem_ctx, "string_case_handle"); + + torture_suite_add_simple_test(suite, "gd_case_utf8", test_gd_case_utf8_handle); + torture_suite_add_simple_test(suite, "gd_case_cp850", test_gd_case_cp850_handle); + torture_suite_add_simple_test(suite, "plato_case_utf8", test_plato_case_utf8_handle); + return suite; +} + +struct torture_suite *torture_local_convert_string(TALLOC_CTX *mem_ctx) +{ + struct torture_suite *suite = torture_suite_create(mem_ctx, "convert_string"); + + torture_suite_add_simple_test(suite, "short_strings", test_short_strings); + torture_suite_add_simple_test(suite, "gd", test_gd); + torture_suite_add_simple_test(suite, "plato", test_plato); + torture_suite_add_simple_test(suite, "plato_latin", test_plato_latin); + return suite; +} + +struct torture_suite *torture_local_string_case(TALLOC_CTX *mem_ctx) +{ + struct torture_suite *suite = torture_suite_create(mem_ctx, "string_case_handle"); + + torture_suite_add_simple_test(suite, "gd_case", test_gd_case); + torture_suite_add_simple_test(suite, "plato_case", test_plato_case); + return suite; +} diff --git a/lib/util/charset/tests/iconv.c b/lib/util/charset/tests/iconv.c new file mode 100644 index 0000000..3733c3c --- /dev/null +++ b/lib/util/charset/tests/iconv.c @@ -0,0 +1,495 @@ +/* + Unix SMB/CIFS implementation. + + local testing of iconv routines. This tests the system iconv code against + the built-in iconv code + + Copyright (C) Andrew Tridgell 2004 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" +#include "torture/torture.h" +#include "system/iconv.h" +#include "system/time.h" +#include "libcli/raw/libcliraw.h" +#include "param/param.h" +#include "torture/util.h" +#include "torture/local/proto.h" +#include "talloc.h" + +#ifdef HAVE_NATIVE_ICONV + +static bool iconv_untestable(struct torture_context *tctx) +{ + iconv_t cd; + + cd = iconv_open("UTF-16LE", "UCS-4LE"); + if (cd == (iconv_t)-1) + torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> UCS-4LE"); + iconv_close(cd); + + cd = iconv_open("UTF-16LE", "CP850"); + if (cd == (iconv_t)-1) + torture_skip(tctx, "unable to test - system iconv library does not support UTF-16LE -> CP850\n"); + iconv_close(cd); + + return false; +} + +/* + generate a UTF-16LE buffer for a given unicode codepoint +*/ +static int gen_codepoint_utf16(unsigned int codepoint, + char *buf, size_t *size) +{ + static iconv_t cd; + uint8_t in[4]; + char *ptr_in; + size_t size_in, size_out, ret; + if (!cd) { + cd = iconv_open("UTF-16LE", "UCS-4LE"); + if (cd == (iconv_t)-1) { + cd = NULL; + return -1; + } + } + + in[0] = codepoint & 0xFF; + in[1] = (codepoint>>8) & 0xFF; + in[2] = (codepoint>>16) & 0xFF; + in[3] = (codepoint>>24) & 0xFF; + + ptr_in = (char *)in; + size_in = 4; + size_out = 8; + + ret = iconv(cd, &ptr_in, &size_in, &buf, &size_out); + + *size = 8 - size_out; + + return ret; +} + + +/* + work out the unicode codepoint of the first UTF-8 character in the buffer +*/ +static unsigned int get_codepoint(char *buf, size_t size, const char *charset) +{ + iconv_t cd; + uint8_t out[4]; + char *ptr_out; + size_t size_out, size_in, ret; + + cd = iconv_open("UCS-4LE", charset); + + size_in = size; + ptr_out = (char *)out; + size_out = sizeof(out); + memset(out, 0, sizeof(out)); + + ret = iconv(cd, &buf, &size_in, &ptr_out, &size_out); + iconv_close(cd); + if (ret == (size_t) -1) { + return (unsigned int)-1; + } + + return out[0] | (out[1]<<8) | (out[2]<<16) | (out[3]<<24); +} + +/* + display a buffer with name prefix +*/ +static void show_buf(const char *name, uint8_t *buf, size_t size) +{ + int i; + printf("%s ", name); + for (i=0;i<size;i++) { + printf("%02x ", buf[i]); + } + printf("\n"); +} + +/* + given a UTF-16LE buffer, test the system and built-in iconv code to + make sure they do exactly the same thing in converting the buffer to + "charset", then convert it back again and ensure we get the same + buffer back +*/ +static bool test_buffer(struct torture_context *test, + uint8_t *inbuf, size_t size, const char *charset) +{ + uint8_t buf1[1000], buf2[1000], buf3[1000]; + size_t outsize1, outsize2, outsize3; + const char *ptr_in1; + char *ptr_in2; + char *ptr_out; + size_t size_in1, size_in2, size_in3; + size_t ret1, ret2, ret3, len1, len2; + int errno1, errno2; + static iconv_t cd; + static smb_iconv_t cd2, cd3; + static const char *last_charset; + + if (cd && last_charset) { + iconv_close(cd); + smb_iconv_close(cd2); + smb_iconv_close(cd3); + cd = NULL; + } + + if (!cd) { + cd = iconv_open(charset, "UTF-16LE"); + if (cd == (iconv_t)-1) { + torture_fail(test, + talloc_asprintf(test, + "failed to open %s to UTF-16LE", + charset)); + } + cd2 = smb_iconv_open_ex(test, charset, "UTF-16LE", lpcfg_parm_bool(test->lp_ctx, NULL, "iconv", "use_builtin_handlers", true)); + if (cd2 == (iconv_t)-1) { + torture_fail(test, + talloc_asprintf(test, + "failed to open %s to UTF-16LE via smb_iconv_open_ex", + charset)); + } + cd3 = smb_iconv_open_ex(test, "UTF-16LE", charset, lpcfg_parm_bool(test->lp_ctx, NULL, "iconv", "use_builtin_handlers", true)); + if (cd3 == (iconv_t)-1) { + torture_fail(test, + talloc_asprintf(test, + "failed to open UTF-16LE to %s via smb_iconv_open_ex", + charset)); + } + last_charset = charset; + } + + /* internal convert to charset - placing result in buf1 */ + ptr_in1 = (const char *)inbuf; + ptr_out = (char *)buf1; + size_in1 = size; + outsize1 = sizeof(buf1); + + memset(ptr_out, 0, outsize1); + errno = 0; + ret1 = smb_iconv(cd2, &ptr_in1, &size_in1, &ptr_out, &outsize1); + errno1 = errno; + + /* system convert to charset - placing result in buf2 */ + ptr_in2 = (char *)inbuf; + ptr_out = (char *)buf2; + size_in2 = size; + outsize2 = sizeof(buf2); + + memset(ptr_out, 0, outsize2); + errno = 0; + ret2 = iconv(cd, &ptr_in2, &size_in2, &ptr_out, &outsize2); + errno2 = errno; + + len1 = sizeof(buf1) - outsize1; + len2 = sizeof(buf2) - outsize2; + + /* codepoints above 1M are not interesting for now */ + if (len2 > len1 && + memcmp(buf1, buf2, len1) == 0 && + get_codepoint((char *)(buf2+len1), len2-len1, charset) >= (1<<20)) { + return true; + } + if (len1 > len2 && + memcmp(buf1, buf2, len2) == 0 && + get_codepoint((char *)(buf1+len2), len1-len2, charset) >= (1<<20)) { + return true; + } + + torture_assert_int_equal(test, ret1, ret2, "ret mismatch"); + + if (errno1 != errno2) { + show_buf(" rem1:", inbuf+(size-size_in1), size_in1); + show_buf(" rem2:", inbuf+(size-size_in2), size_in2); + torture_fail(test, talloc_asprintf(test, + "errno mismatch with %s internal=%d/%s system=%d/%s", + charset, + errno1, strerror(errno1), + errno2, strerror(errno2))); + } + + torture_assert_int_equal(test, outsize1, outsize2, "outsize mismatch"); + + torture_assert_int_equal(test, size_in1, size_in2, "size_in mismatch"); + + if (len1 != len2 || + memcmp(buf1, buf2, len1) != 0) { + torture_comment(test, "size=%d ret1=%d ret2=%d", (int)size, (int)ret1, (int)ret2); + show_buf(" IN1:", inbuf, size-size_in1); + show_buf(" IN2:", inbuf, size-size_in2); + show_buf("OUT1:", buf1, len1); + show_buf("OUT2:", buf2, len2); + if (len2 > len1 && memcmp(buf1, buf2, len1) == 0) { + torture_comment(test, "next codepoint is %u", + get_codepoint((char *)(buf2+len1), len2-len1, charset)); + } + if (len1 > len2 && memcmp(buf1, buf2, len2) == 0) { + torture_comment(test, "next codepoint is %u", + get_codepoint((char *)(buf1+len2),len1-len2, charset)); + } + + torture_fail(test, "failed"); + } + + /* convert back to UTF-16, putting result in buf3 */ + size = size - size_in1; + ptr_in1 = (const char *)buf1; + ptr_out = (char *)buf3; + size_in3 = len1; + outsize3 = sizeof(buf3); + + memset(ptr_out, 0, outsize3); + ret3 = smb_iconv(cd3, &ptr_in1, &size_in3, &ptr_out, &outsize3); + + /* we only internally support the first 1M codepoints */ + if (outsize3 != sizeof(buf3) - size && + get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3), + size - (sizeof(buf3) - outsize3), + "UTF-16LE") >= (1<<20)) { + return true; + } + + torture_assert_int_equal(test, ret3, 0, talloc_asprintf(test, + "pull failed - %s", strerror(errno))); + + if (strncmp(charset, "UTF", 3) != 0) { + /* don't expect perfect mappings for non UTF charsets */ + return true; + } + + + torture_assert_int_equal(test, outsize3, sizeof(buf3) - size, + "wrong outsize3"); + + if (memcmp(buf3, inbuf, size) != 0) { + torture_comment(test, "pull bytes mismatch:"); + show_buf("inbuf", inbuf, size); + show_buf(" buf3", buf3, sizeof(buf3) - outsize3); + torture_comment(test, "next codepoint is %u\n", + get_codepoint((char *)(inbuf+sizeof(buf3) - outsize3), + size - (sizeof(buf3) - outsize3), + "UTF-16LE")); + torture_fail(test, ""); + } + + return true; +} + + +/* + test the push_codepoint() and next_codepoint() functions for a given + codepoint +*/ +static bool test_codepoint(struct torture_context *tctx, unsigned int codepoint) +{ + uint8_t buf[10]; + size_t size, size2; + codepoint_t c; + + size = push_codepoint_handle(lpcfg_iconv_handle(tctx->lp_ctx), (char *)buf, codepoint); + torture_assert(tctx, size != -1 || (codepoint >= 0xd800 && codepoint <= 0x10000), + "Invalid Codepoint range"); + + if (size == -1) return true; + + buf[size] = random(); + buf[size+1] = random(); + buf[size+2] = random(); + buf[size+3] = random(); + + c = next_codepoint_handle(lpcfg_iconv_handle(tctx->lp_ctx), (char *)buf, &size2); + + torture_assert(tctx, c == codepoint, + talloc_asprintf(tctx, + "next_codepoint(%u) failed - gave %u", codepoint, c)); + + torture_assert(tctx, size2 == size, + talloc_asprintf(tctx, "next_codepoint(%u) gave wrong size %d (should be %d)\n", + codepoint, (int)size2, (int)size)); + + return true; +} + +static bool test_next_codepoint(struct torture_context *tctx) +{ + unsigned int codepoint; + if (iconv_untestable(tctx)) + return true; + + for (codepoint=0;codepoint<(1<<20);codepoint++) { + if (!test_codepoint(tctx, codepoint)) + return false; + } + return true; +} + +static bool test_first_1m(struct torture_context *tctx) +{ + unsigned int codepoint; + size_t size; + unsigned char inbuf[1000]; + + if (iconv_untestable(tctx)) + return true; + + for (codepoint=0;codepoint<(1<<20);codepoint++) { + if (gen_codepoint_utf16(codepoint, (char *)inbuf, &size) != 0) { + continue; + } + + if (codepoint % 1000 == 0) { + if (torture_setting_bool(tctx, "progress", true)) { + torture_comment(tctx, "codepoint=%u \r", codepoint); + fflush(stdout); + } + } + + if (!test_buffer(tctx, inbuf, size, "UTF-8")) + return false; + } + return true; +} + +static bool test_random_5m(struct torture_context *tctx) +{ + unsigned char inbuf[1000]; + unsigned int i; + + if (iconv_untestable(tctx)) + return true; + + for (i=0;i<500000;i++) { + size_t size; + unsigned int c; + + if (i % 1000 == 0) { + if (torture_setting_bool(tctx, "progress", true)) { + torture_comment(tctx, "i=%u \r", i); + fflush(stdout); + } + } + + size = random() % 100; + for (c=0;c<size;c++) { + if (random() % 100 < 80) { + inbuf[c] = random() % 128; + } else { + inbuf[c] = random(); + } + if (random() % 10 == 0) { + inbuf[c] |= 0xd8; + } + if (random() % 10 == 0) { + inbuf[c] |= 0xdc; + } + } + if (!test_buffer(tctx, inbuf, size, "UTF-8")) { + printf("i=%d failed UTF-8\n", i); + return false; + } + + if (!test_buffer(tctx, inbuf, size, "CP850")) { + printf("i=%d failed CP850\n", i); + return false; + } + } + return true; +} + + +static bool test_string2key(struct torture_context *tctx) +{ + uint16_t *buf; + char *dest = NULL; + TALLOC_CTX *mem_ctx = talloc_new(tctx); + size_t len = (random()%1000)+1; + const uint16_t in1[10] = { 'a', 0xd805, 'b', 0xdcf0, 'c', 0, 'd', 'e', 'f', 'g' }; + uint8_t le1[20]; + uint8_t *munged1; + uint8_t *out1; + size_t ret; + int i; + const char *correct = "a\357\277\275b\357\277\275c\001defg"; + + buf = talloc_size(mem_ctx, len*2); + generate_random_buffer((uint8_t *)buf, len*2); + + torture_comment(tctx, "converting random buffer\n"); + + if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)buf, len*2, (void**)&dest, &ret)) { + torture_fail(tctx, "Failed to convert random buffer\n"); + } + + for (i=0;i<10;i++) { + SSVAL(&le1[2*i], 0, in1[i]); + } + + torture_comment(tctx, "converting fixed buffer to UTF16\n"); + + if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF16, (void *)le1, 20, (void**)&munged1, &ret)) { + torture_fail(tctx, "Failed to convert fixed buffer to UTF16_MUNGED\n"); + } + + torture_assert(tctx, ret == 20, "conversion should give 20 bytes\n"); + + torture_comment(tctx, "converting fixed buffer to UTF8\n"); + + if (!convert_string_talloc(mem_ctx, CH_UTF16MUNGED, CH_UTF8, (void *)le1, 20, (void**)&out1, &ret)) { + torture_fail(tctx, "Failed to convert fixed buffer to UTF8\n"); + } + + torture_assert(tctx, strcmp(correct, (const char *) out1) == 0, + "conversion gave incorrect result\n"); + + talloc_free(mem_ctx); + + return true; +} + +struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx) +{ + struct torture_suite *suite = torture_suite_create(mem_ctx, "iconv"); + + torture_suite_add_simple_test(suite, "string2key", + test_string2key); + + torture_suite_add_simple_test(suite, "next_codepoint()", + test_next_codepoint); + + torture_suite_add_simple_test(suite, "first 1M codepoints", + test_first_1m); + + torture_suite_add_simple_test(suite, "5M random UTF-16LE sequences", + test_random_5m); + + torture_suite_add_simple_test(suite, "string2key", + test_string2key); + return suite; +} + +#else + +struct torture_suite *torture_local_iconv(TALLOC_CTX *mem_ctx) +{ + printf("No native iconv library - can't run iconv test\n"); + return NULL; +} + +#endif diff --git a/lib/util/charset/tests/util_unistr.c b/lib/util/charset/tests/util_unistr.c new file mode 100644 index 0000000..1a9fcaa --- /dev/null +++ b/lib/util/charset/tests/util_unistr.c @@ -0,0 +1,166 @@ +/* + Unix SMB/CIFS implementation. + test suite for the util_unistr utility functions + + Copyright (C) Catalyst.Net Ltd. 2023 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" +#include "torture/torture.h" + +#undef strcasecmp +#undef strncasecmp + +struct torture_suite *torture_local_util_unistr(TALLOC_CTX *mem_ctx); + +static bool test_utf16_len(struct torture_context *tctx) +{ + static const uint16_t empty_string[] = {'\0'}; + static const uint16_t foo_bar[] = { + 'f', 'o', 'o', ' ', 'b', 'a', 'r', '\0'}; + static const uint16_t foo_bar_alternative[] = {0xd83c, + 0xdd75, + 0xd83c, + 0xdd7e, + 0xd83c, + 0xdd7e, + ' ', + 0xd83c, + 0xdd31, + 0xd83c, + 0xdd30, + 0xd83c, + 0xdd41, + '\0'}; + + torture_assert_size_equal(tctx, + utf16_len(empty_string), + 0, + "length of empty string"); + torture_assert_size_equal(tctx, + utf16_null_terminated_len(empty_string), + 2, + "null‐terminated length of empty string"); + torture_assert_size_equal(tctx, + utf16_len(foo_bar), + 14, + "length of “foo bar”"); + torture_assert_size_equal(tctx, + utf16_null_terminated_len(foo_bar), + 16, + "null‐terminated length of “foo bar”"); + torture_assert_size_equal(tctx, + utf16_len(foo_bar_alternative), + 26, + "length of “🅵🅾🅾 🄱🄰🅁”"); + torture_assert_size_equal(tctx, + utf16_null_terminated_len( + foo_bar_alternative), + 28, + "null‐terminated length of “🅵🅾🅾 🄱🄰🅁”"); + + return true; +} + +static bool test_utf16_len_n(struct torture_context *tctx) +{ + static const uint16_t empty_string[] = {'\0'}; + static const uint16_t foo_bar[] = {'f', 'o', 'o', ' ', 'b', 'a', 'r'}; + static const uint16_t null_terminated_foo_bar[] = { + 'f', 'o', 'o', ' ', 'b', 'a', 'r', '\0'}; + static const uint16_t twice_null_terminated_abc[] = { + 'a', 'b', 'c', '\0', '\0'}; + + torture_assert_size_equal(tctx, + utf16_len_n(empty_string, 0), + 0, + "length of empty string"); + torture_assert_size_equal(tctx, + utf16_null_terminated_len_n(empty_string, 0), + 0, + "null‐terminated length of empty string"); + + torture_assert_size_equal(tctx, + utf16_len_n(empty_string, + sizeof empty_string), + 0, + "length of null‐terminated empty string"); + torture_assert_size_equal( + tctx, + utf16_null_terminated_len_n(empty_string, sizeof empty_string), + 2, + "null‐terminated length of null‐terminated empty string"); + + torture_assert_size_equal(tctx, + utf16_len_n(foo_bar, sizeof foo_bar), + 14, + "length of “foo bar”"); + torture_assert_size_equal(tctx, + utf16_null_terminated_len_n(foo_bar, + sizeof foo_bar), + 14, + "null‐terminated length of “foo bar”"); + + torture_assert_size_equal(tctx, + utf16_len_n(null_terminated_foo_bar, + sizeof null_terminated_foo_bar), + 14, + "length of null‐terminated “foo bar”"); + torture_assert_size_equal( + tctx, + utf16_null_terminated_len_n(null_terminated_foo_bar, + sizeof null_terminated_foo_bar), + 16, + "null‐terminated length of null‐terminated “foo bar”"); + + torture_assert_size_equal(tctx, + utf16_len_n(null_terminated_foo_bar, + sizeof null_terminated_foo_bar - + 1), + 14, + "length of “foo bar” minus one byte"); + torture_assert_size_equal( + tctx, + utf16_null_terminated_len_n(null_terminated_foo_bar, + sizeof null_terminated_foo_bar - 1), + 14, + "null‐terminated length of “foo bar” minus one byte"); + + torture_assert_size_equal(tctx, + utf16_len_n(twice_null_terminated_abc, + sizeof twice_null_terminated_abc), + 6, + "length of twice–null‐terminated “abc”"); + torture_assert_size_equal( + tctx, + utf16_null_terminated_len_n(twice_null_terminated_abc, + sizeof twice_null_terminated_abc), + 8, + "null‐terminated length of twice–null‐terminated “abc”"); + + return true; +} + +struct torture_suite *torture_local_util_unistr(TALLOC_CTX *mem_ctx) +{ + struct torture_suite *suite = torture_suite_create(mem_ctx, + "util_unistr"); + + torture_suite_add_simple_test(suite, "utf16_len", test_utf16_len); + torture_suite_add_simple_test(suite, "utf16_len_n", test_utf16_len_n); + + return suite; +} |