From 0915b3ef56dfac3113cce55a59a5765dc94976be Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 28 Apr 2024 14:34:54 +0200 Subject: Adding upstream version 2.13.6. Signed-off-by: Daniel Baumann --- .../utf8cpp/test_drivers/smoke_test/test.cpp | 298 +++++++++++++++++++++ 1 file changed, 298 insertions(+) create mode 100644 third-party/utf8cpp/test_drivers/smoke_test/test.cpp (limited to 'third-party/utf8cpp/test_drivers/smoke_test/test.cpp') diff --git a/third-party/utf8cpp/test_drivers/smoke_test/test.cpp b/third-party/utf8cpp/test_drivers/smoke_test/test.cpp new file mode 100644 index 0000000..4f9fb04 --- /dev/null +++ b/third-party/utf8cpp/test_drivers/smoke_test/test.cpp @@ -0,0 +1,298 @@ +#include +#include +#include +#include "../../source/utf8.h" +using namespace utf8; +using namespace std; + +int main() +{ + //append + unsigned char u[5] = {0,0,0,0,0}; + + append(0x0448, u); + assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0); + + append(0x65e5, u); + assert (u[0] == 0xe6 && u[1] == 0x97 && u[2] == 0xa5 && u[3] == 0 && u[4] == 0); + + append(0x3044, u); + assert (u[0] == 0xe3 && u[1] == 0x81 && u[2] == 0x84 && u[3] == 0 && u[4] == 0); + + append(0x10346, u); + assert (u[0] == 0xf0 && u[1] == 0x90 && u[2] == 0x8d && u[3] == 0x86 && u[4] == 0); + + + //next + const char* twochars = "\xe6\x97\xa5\xd1\x88"; + const char* w = twochars; + int cp = next(w, twochars + 6); + assert (cp == 0x65e5); + assert (w == twochars + 3); + + const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"; + w = threechars; + cp = next(w, threechars + 9); + assert (cp == 0x10346); + assert (w == threechars + 4); + cp = next(w, threechars + 9); + assert (cp == 0x65e5); + assert (w == threechars + 7); + cp = next(w, threechars + 9); + assert (cp == 0x0448); + assert (w == threechars + 9); + + //peek_next + const char* const cw = twochars; + cp = peek_next(cw, cw + 6); + assert (cp == 0x65e5); + assert (cw == twochars); + + //prior + w = twochars + 3; + cp = prior (w, twochars); + assert (cp == 0x65e5); + assert (w == twochars); + + w = threechars + 9; + cp = prior(w, threechars); + assert (cp == 0x0448); + assert (w == threechars + 7); + cp = prior(w, threechars); + assert (cp == 0x65e5); + assert (w == threechars + 4); + cp = prior(w, threechars); + assert (cp == 0x10346); + assert (w == threechars); + + //previous (deprecated) + w = twochars + 3; + cp = previous (w, twochars - 1); + assert (cp == 0x65e5); + assert (w == twochars); + + w = threechars + 9; + cp = previous(w, threechars - 1); + assert (cp == 0x0448); + assert (w == threechars + 7); + cp = previous(w, threechars -1); + assert (cp == 0x65e5); + assert (w == threechars + 4); + cp = previous(w, threechars - 1); + assert (cp == 0x10346); + assert (w == threechars); + + // advance + w = twochars; + advance (w, 2, twochars + 6); + assert (w == twochars + 5); + + // distance + size_t dist = utf8::distance(twochars, twochars + 5); + assert (dist == 2); + + // utf32to8 + int utf32string[] = {0x448, 0x65E5, 0x10346, 0}; + vector utf8result; + utf32to8(utf32string, utf32string + 3, back_inserter(utf8result)); + assert (utf8result.size() == 9); + // try it with the return value; + char* utf8_end = utf32to8(utf32string, utf32string + 3, &utf8result[0]); + assert (utf8_end == &utf8result[0] + 9); + + //utf8to32 + vector utf32result; + utf8to32(twochars, twochars + 5, back_inserter(utf32result)); + assert (utf32result.size() == 2); + // try it with the return value; + int* utf32_end = utf8to32(twochars, twochars + 5, &utf32result[0]); + assert (utf32_end == &utf32result[0] + 2); + + //utf16to8 + unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e}; + utf8result.clear(); + utf16to8(utf16string, utf16string + 5, back_inserter(utf8result)); + assert (utf8result.size() == 10); + // try it with the return value; + utf8_end = utf16to8 (utf16string, utf16string + 5, &utf8result[0]); + assert (utf8_end == &utf8result[0] + 10); + + //utf8to16 + char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"; + vector utf16result; + utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result)); + assert (utf16result.size() == 4); + assert (utf16result[2] == 0xd834); + assert (utf16result[3] == 0xdd1e); + // try it with the return value; + unsigned short* utf16_end = utf8to16 (utf8_with_surrogates, utf8_with_surrogates + 9, &utf16result[0]); + assert (utf16_end == &utf16result[0] + 4); + + //find_invalid + char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa"; + char* invalid = find_invalid(utf_invalid, utf_invalid + 6); + assert (invalid == utf_invalid + 5); + + //is_valid + bool bvalid = is_valid(utf_invalid, utf_invalid + 6); + assert (bvalid == false); + bvalid = is_valid(utf8_with_surrogates, utf8_with_surrogates + 9); + assert (bvalid == true); + + //starts_with_bom + unsigned char byte_order_mark[] = {0xef, 0xbb, 0xbf}; + bool bbom = starts_with_bom(byte_order_mark, byte_order_mark + sizeof(byte_order_mark)); + assert (bbom == true); + bool no_bbom = starts_with_bom(threechars, threechars + sizeof(threechars)); + assert (no_bbom == false); + + //is_bom + bool unsafe_bbom = is_bom(byte_order_mark); + assert (unsafe_bbom == true); + + + //replace_invalid + char invalid_sequence[] = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z"; + vector replace_invalid_result; + replace_invalid (invalid_sequence, invalid_sequence + sizeof(invalid_sequence), std::back_inserter(replace_invalid_result), '?'); + bvalid = is_valid(replace_invalid_result.begin(), replace_invalid_result.end()); + assert (bvalid); + const char fixed_invalid_sequence[] = "a????z"; + assert (sizeof(fixed_invalid_sequence) == replace_invalid_result.size()); + assert (std::equal(replace_invalid_result.begin(), replace_invalid_result.begin() + sizeof(fixed_invalid_sequence), fixed_invalid_sequence)); + + // iterator + utf8::iterator it(threechars, threechars, threechars + 9); + utf8::iterator it2 = it; + assert (it2 == it); + assert (*it == 0x10346); + assert (*(++it) == 0x65e5); + assert ((*it++) == 0x65e5); + assert (*it == 0x0448); + assert (it != it2); + utf8::iterator endit (threechars + 9, threechars, threechars + 9); + assert (++it == endit); + assert (*(--it) == 0x0448); + assert ((*it--) == 0x0448); + assert (*it == 0x65e5); + assert (--it == utf8::iterator(threechars, threechars, threechars + 9)); + assert (*it == 0x10346); + + ////////////////////////////////////////////////////////// + //// Unchecked variants + ////////////////////////////////////////////////////////// + + //append + memset(u, 0, 5); + append(0x0448, u); + assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0); + + append(0x65e5, u); + assert (u[0] == 0xe6 && u[1] == 0x97 && u[2] == 0xa5 && u[3] == 0 && u[4] == 0); + + append(0x10346, u); + assert (u[0] == 0xf0 && u[1] == 0x90 && u[2] == 0x8d && u[3] == 0x86 && u[4] == 0); + + //next + w = twochars; + cp = unchecked::next(w); + assert (cp == 0x65e5); + assert (w == twochars + 3); + + w = threechars; + cp = unchecked::next(w); + assert (cp == 0x10346); + assert (w == threechars + 4); + cp = unchecked::next(w); + assert (cp == 0x65e5); + assert (w == threechars + 7); + cp = unchecked::next(w); + assert (cp == 0x0448); + assert (w == threechars + 9); + + //peek_next + cp = unchecked::peek_next(cw); + assert (cp == 0x65e5); + assert (cw == twochars); + + + //previous (calls prior internally) + + w = twochars + 3; + cp = unchecked::previous (w); + assert (cp == 0x65e5); + assert (w == twochars); + + w = threechars + 9; + cp = unchecked::previous(w); + assert (cp == 0x0448); + assert (w == threechars + 7); + cp = unchecked::previous(w); + assert (cp == 0x65e5); + assert (w == threechars + 4); + cp = unchecked::previous(w); + assert (cp == 0x10346); + assert (w == threechars); + + // advance + w = twochars; + unchecked::advance (w, 2); + assert (w == twochars + 5); + + // distance + dist = unchecked::distance(twochars, twochars + 5); + assert (dist == 2); + + // utf32to8 + utf8result.clear(); + unchecked::utf32to8(utf32string, utf32string + 3, back_inserter(utf8result)); + assert (utf8result.size() == 9); + // try it with the return value; + utf8_end = utf32to8(utf32string, utf32string + 3, &utf8result[0]); + assert(utf8_end == &utf8result[0] + 9); + + //utf8to32 + utf32result.clear(); + unchecked::utf8to32(twochars, twochars + 5, back_inserter(utf32result)); + assert (utf32result.size() == 2); + // try it with the return value; + utf32_end = utf8to32(twochars, twochars + 5, &utf32result[0]); + assert (utf32_end == &utf32result[0] + 2); + + //utf16to8 + utf8result.clear(); + unchecked::utf16to8(utf16string, utf16string + 5, back_inserter(utf8result)); + assert (utf8result.size() == 10); + // try it with the return value; + utf8_end = utf16to8 (utf16string, utf16string + 5, &utf8result[0]); + assert (utf8_end == &utf8result[0] + 10); + + //utf8to16 + utf16result.clear(); + unchecked::utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result)); + assert (utf16result.size() == 4); + assert (utf16result[2] == 0xd834); + assert (utf16result[3] == 0xdd1e); + // try it with the return value; + utf16_end = utf8to16 (utf8_with_surrogates, utf8_with_surrogates + 9, &utf16result[0]); + assert (utf16_end == &utf16result[0] + 4); + + // iterator + utf8::unchecked::iterator un_it(threechars); + utf8::unchecked::iterator un_it2 = un_it; + assert (un_it2 == un_it); + assert (*un_it == 0x10346); + assert (*(++un_it) == 0x65e5); + assert ((*un_it++) == 0x65e5); + assert (un_it != un_it2); + assert (*un_it == 0x0448); + utf8::unchecked::iterator un_endit (threechars + 9); + assert (++un_it == un_endit); + assert (*(--un_it) == 0x0448); + assert ((*un_it--) == 0x0448); + assert (*un_it == 0x65e5); + assert (--un_it == utf8::unchecked::iterator(threechars)); + assert (*un_it == 0x10346); +} + + -- cgit v1.2.3