diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 12:34:54 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 12:34:54 +0000 |
commit | 0915b3ef56dfac3113cce55a59a5765dc94976be (patch) | |
tree | a8fea11d50b4f083e1bf0f90025ece7f0824784a /third-party/utf8cpp/test_drivers | |
parent | Initial commit. (diff) | |
download | icinga2-upstream.tar.xz icinga2-upstream.zip |
Adding upstream version 2.13.6.upstream/2.13.6upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third-party/utf8cpp/test_drivers')
3 files changed, 511 insertions, 0 deletions
diff --git a/third-party/utf8cpp/test_drivers/negative/negative.cpp b/third-party/utf8cpp/test_drivers/negative/negative.cpp new file mode 100644 index 0000000..0f1015d --- /dev/null +++ b/third-party/utf8cpp/test_drivers/negative/negative.cpp @@ -0,0 +1,53 @@ +#include "../../source/utf8.h" +using namespace utf8; + +#include <string> +#include <iostream> +#include <fstream> +#include <algorithm> +using namespace std; + +const unsigned INVALID_LINES[] = { 75, 76, 83, 84, 85, 93, 102, 103, 105, 106, 107, 108, 109, 110, 114, 115, 116, 117, 124, 125, 130, 135, 140, 145, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 169, 175, 176, 177, 207, 208, 209, 210, 211, 220, 221, 222, 223, 224, 232, 233, 234, 235, 236, 247, 248, 249, 250, 251, 252, 253, 257, 258, 259, 260, 261, 262, 263, 264}; +const unsigned* INVALID_LINES_END = INVALID_LINES + sizeof(INVALID_LINES)/sizeof(unsigned); + +int main(int argc, char** argv) +{ + string test_file_path; + if (argc == 2) + test_file_path = argv[1]; + else { + cout << "Wrong number of arguments" << endl; + exit(0); + } + // Open the test file + ifstream fs8(test_file_path.c_str()); + if (!fs8.is_open()) { + cout << "Could not open " << test_file_path << endl; + return 0; + } + + // Read it line by line + unsigned int line_count = 0; + char byte; + while (!fs8.eof()) { + string line; + while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof()) + line.push_back(byte); + + line_count++; + bool expected_valid = (find(INVALID_LINES, INVALID_LINES_END, line_count) == INVALID_LINES_END); + // Print out lines that contain unexpected invalid UTF-8 + if (!is_valid(line.begin(), line.end())) { + if (expected_valid) + cout << "Unexpected invalid utf-8 at line " << line_count << '\n'; + + // try fixing it: + string fixed_line; + replace_invalid(line.begin(), line.end(), back_inserter(fixed_line)); + if (!is_valid(fixed_line.begin(), fixed_line.end())) + cout << "replace_invalid() resulted in an invalid utf-8 at line " << line_count << '\n'; + } + else if (!expected_valid) + cout << "Invalid utf-8 NOT detected at line " << line_count << '\n'; + } +} diff --git a/third-party/utf8cpp/test_drivers/smoke_test/test.cpp b/third-party/utf8cpp/test_drivers/smoke_test/test.cpp new file mode 100644 index 0000000..4f9fb04 --- /dev/null +++ b/third-party/utf8cpp/test_drivers/smoke_test/test.cpp @@ -0,0 +1,298 @@ +#include <cstring> +#include <cassert> +#include <vector> +#include "../../source/utf8.h" +using namespace utf8; +using namespace std; + +int main() +{ + //append + unsigned char u[5] = {0,0,0,0,0}; + + append(0x0448, u); + assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0); + + append(0x65e5, u); + assert (u[0] == 0xe6 && u[1] == 0x97 && u[2] == 0xa5 && u[3] == 0 && u[4] == 0); + + append(0x3044, u); + assert (u[0] == 0xe3 && u[1] == 0x81 && u[2] == 0x84 && u[3] == 0 && u[4] == 0); + + append(0x10346, u); + assert (u[0] == 0xf0 && u[1] == 0x90 && u[2] == 0x8d && u[3] == 0x86 && u[4] == 0); + + + //next + const char* twochars = "\xe6\x97\xa5\xd1\x88"; + const char* w = twochars; + int cp = next(w, twochars + 6); + assert (cp == 0x65e5); + assert (w == twochars + 3); + + const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"; + w = threechars; + cp = next(w, threechars + 9); + assert (cp == 0x10346); + assert (w == threechars + 4); + cp = next(w, threechars + 9); + assert (cp == 0x65e5); + assert (w == threechars + 7); + cp = next(w, threechars + 9); + assert (cp == 0x0448); + assert (w == threechars + 9); + + //peek_next + const char* const cw = twochars; + cp = peek_next(cw, cw + 6); + assert (cp == 0x65e5); + assert (cw == twochars); + + //prior + w = twochars + 3; + cp = prior (w, twochars); + assert (cp == 0x65e5); + assert (w == twochars); + + w = threechars + 9; + cp = prior(w, threechars); + assert (cp == 0x0448); + assert (w == threechars + 7); + cp = prior(w, threechars); + assert (cp == 0x65e5); + assert (w == threechars + 4); + cp = prior(w, threechars); + assert (cp == 0x10346); + assert (w == threechars); + + //previous (deprecated) + w = twochars + 3; + cp = previous (w, twochars - 1); + assert (cp == 0x65e5); + assert (w == twochars); + + w = threechars + 9; + cp = previous(w, threechars - 1); + assert (cp == 0x0448); + assert (w == threechars + 7); + cp = previous(w, threechars -1); + assert (cp == 0x65e5); + assert (w == threechars + 4); + cp = previous(w, threechars - 1); + assert (cp == 0x10346); + assert (w == threechars); + + // advance + w = twochars; + advance (w, 2, twochars + 6); + assert (w == twochars + 5); + + // distance + size_t dist = utf8::distance(twochars, twochars + 5); + assert (dist == 2); + + // utf32to8 + int utf32string[] = {0x448, 0x65E5, 0x10346, 0}; + vector<char> utf8result; + utf32to8(utf32string, utf32string + 3, back_inserter(utf8result)); + assert (utf8result.size() == 9); + // try it with the return value; + char* utf8_end = utf32to8(utf32string, utf32string + 3, &utf8result[0]); + assert (utf8_end == &utf8result[0] + 9); + + //utf8to32 + vector<int> utf32result; + utf8to32(twochars, twochars + 5, back_inserter(utf32result)); + assert (utf32result.size() == 2); + // try it with the return value; + int* utf32_end = utf8to32(twochars, twochars + 5, &utf32result[0]); + assert (utf32_end == &utf32result[0] + 2); + + //utf16to8 + unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e}; + utf8result.clear(); + utf16to8(utf16string, utf16string + 5, back_inserter(utf8result)); + assert (utf8result.size() == 10); + // try it with the return value; + utf8_end = utf16to8 (utf16string, utf16string + 5, &utf8result[0]); + assert (utf8_end == &utf8result[0] + 10); + + //utf8to16 + char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"; + vector <unsigned short> utf16result; + utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result)); + assert (utf16result.size() == 4); + assert (utf16result[2] == 0xd834); + assert (utf16result[3] == 0xdd1e); + // try it with the return value; + unsigned short* utf16_end = utf8to16 (utf8_with_surrogates, utf8_with_surrogates + 9, &utf16result[0]); + assert (utf16_end == &utf16result[0] + 4); + + //find_invalid + char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa"; + char* invalid = find_invalid(utf_invalid, utf_invalid + 6); + assert (invalid == utf_invalid + 5); + + //is_valid + bool bvalid = is_valid(utf_invalid, utf_invalid + 6); + assert (bvalid == false); + bvalid = is_valid(utf8_with_surrogates, utf8_with_surrogates + 9); + assert (bvalid == true); + + //starts_with_bom + unsigned char byte_order_mark[] = {0xef, 0xbb, 0xbf}; + bool bbom = starts_with_bom(byte_order_mark, byte_order_mark + sizeof(byte_order_mark)); + assert (bbom == true); + bool no_bbom = starts_with_bom(threechars, threechars + sizeof(threechars)); + assert (no_bbom == false); + + //is_bom + bool unsafe_bbom = is_bom(byte_order_mark); + assert (unsafe_bbom == true); + + + //replace_invalid + char invalid_sequence[] = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z"; + vector<char> replace_invalid_result; + replace_invalid (invalid_sequence, invalid_sequence + sizeof(invalid_sequence), std::back_inserter(replace_invalid_result), '?'); + bvalid = is_valid(replace_invalid_result.begin(), replace_invalid_result.end()); + assert (bvalid); + const char fixed_invalid_sequence[] = "a????z"; + assert (sizeof(fixed_invalid_sequence) == replace_invalid_result.size()); + assert (std::equal(replace_invalid_result.begin(), replace_invalid_result.begin() + sizeof(fixed_invalid_sequence), fixed_invalid_sequence)); + + // iterator + utf8::iterator<const char*> it(threechars, threechars, threechars + 9); + utf8::iterator<const char*> it2 = it; + assert (it2 == it); + assert (*it == 0x10346); + assert (*(++it) == 0x65e5); + assert ((*it++) == 0x65e5); + assert (*it == 0x0448); + assert (it != it2); + utf8::iterator<const char*> endit (threechars + 9, threechars, threechars + 9); + assert (++it == endit); + assert (*(--it) == 0x0448); + assert ((*it--) == 0x0448); + assert (*it == 0x65e5); + assert (--it == utf8::iterator<const char*>(threechars, threechars, threechars + 9)); + assert (*it == 0x10346); + + ////////////////////////////////////////////////////////// + //// Unchecked variants + ////////////////////////////////////////////////////////// + + //append + memset(u, 0, 5); + append(0x0448, u); + assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0); + + append(0x65e5, u); + assert (u[0] == 0xe6 && u[1] == 0x97 && u[2] == 0xa5 && u[3] == 0 && u[4] == 0); + + append(0x10346, u); + assert (u[0] == 0xf0 && u[1] == 0x90 && u[2] == 0x8d && u[3] == 0x86 && u[4] == 0); + + //next + w = twochars; + cp = unchecked::next(w); + assert (cp == 0x65e5); + assert (w == twochars + 3); + + w = threechars; + cp = unchecked::next(w); + assert (cp == 0x10346); + assert (w == threechars + 4); + cp = unchecked::next(w); + assert (cp == 0x65e5); + assert (w == threechars + 7); + cp = unchecked::next(w); + assert (cp == 0x0448); + assert (w == threechars + 9); + + //peek_next + cp = unchecked::peek_next(cw); + assert (cp == 0x65e5); + assert (cw == twochars); + + + //previous (calls prior internally) + + w = twochars + 3; + cp = unchecked::previous (w); + assert (cp == 0x65e5); + assert (w == twochars); + + w = threechars + 9; + cp = unchecked::previous(w); + assert (cp == 0x0448); + assert (w == threechars + 7); + cp = unchecked::previous(w); + assert (cp == 0x65e5); + assert (w == threechars + 4); + cp = unchecked::previous(w); + assert (cp == 0x10346); + assert (w == threechars); + + // advance + w = twochars; + unchecked::advance (w, 2); + assert (w == twochars + 5); + + // distance + dist = unchecked::distance(twochars, twochars + 5); + assert (dist == 2); + + // utf32to8 + utf8result.clear(); + unchecked::utf32to8(utf32string, utf32string + 3, back_inserter(utf8result)); + assert (utf8result.size() == 9); + // try it with the return value; + utf8_end = utf32to8(utf32string, utf32string + 3, &utf8result[0]); + assert(utf8_end == &utf8result[0] + 9); + + //utf8to32 + utf32result.clear(); + unchecked::utf8to32(twochars, twochars + 5, back_inserter(utf32result)); + assert (utf32result.size() == 2); + // try it with the return value; + utf32_end = utf8to32(twochars, twochars + 5, &utf32result[0]); + assert (utf32_end == &utf32result[0] + 2); + + //utf16to8 + utf8result.clear(); + unchecked::utf16to8(utf16string, utf16string + 5, back_inserter(utf8result)); + assert (utf8result.size() == 10); + // try it with the return value; + utf8_end = utf16to8 (utf16string, utf16string + 5, &utf8result[0]); + assert (utf8_end == &utf8result[0] + 10); + + //utf8to16 + utf16result.clear(); + unchecked::utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result)); + assert (utf16result.size() == 4); + assert (utf16result[2] == 0xd834); + assert (utf16result[3] == 0xdd1e); + // try it with the return value; + utf16_end = utf8to16 (utf8_with_surrogates, utf8_with_surrogates + 9, &utf16result[0]); + assert (utf16_end == &utf16result[0] + 4); + + // iterator + utf8::unchecked::iterator<const char*> un_it(threechars); + utf8::unchecked::iterator<const char*> un_it2 = un_it; + assert (un_it2 == un_it); + assert (*un_it == 0x10346); + assert (*(++un_it) == 0x65e5); + assert ((*un_it++) == 0x65e5); + assert (un_it != un_it2); + assert (*un_it == 0x0448); + utf8::unchecked::iterator<const char*> un_endit (threechars + 9); + assert (++un_it == un_endit); + assert (*(--un_it) == 0x0448); + assert ((*un_it--) == 0x0448); + assert (*un_it == 0x65e5); + assert (--un_it == utf8::unchecked::iterator<const char*>(threechars)); + assert (*un_it == 0x10346); +} + + diff --git a/third-party/utf8cpp/test_drivers/utf8reader/utf8reader.cpp b/third-party/utf8cpp/test_drivers/utf8reader/utf8reader.cpp new file mode 100644 index 0000000..c88a5ee --- /dev/null +++ b/third-party/utf8cpp/test_drivers/utf8reader/utf8reader.cpp @@ -0,0 +1,160 @@ +#include "../../source/utf8.h" +using namespace utf8; + +#include <string> +#include <iostream> +#include <fstream> +#include <vector> +using namespace std; + +int main(int argc, char** argv) +{ + if (argc != 2) { + cout << "\nUsage: utfreader filename\n"; + return 0; + } + const char* TEST_FILE_PATH = argv[1]; + // Open the test file + ifstream fs8(TEST_FILE_PATH); + if (!fs8.is_open()) { + cout << "Could not open " << TEST_FILE_PATH << endl; + return 0; + } + + // Read it line by line + unsigned int line_count = 0; + char byte; + while (!fs8.eof()) { + string line; + while ((byte = static_cast<char>(fs8.get())) != '\n' && !fs8.eof()) + line.push_back(byte); + + line_count++; + // Play around with each line and convert it to utf16 + string::iterator line_start = line.begin(); + string::iterator line_end = line.end(); + line_end = find_invalid(line_start, line_end); + if (line_end != line.end()) + cout << "Line " << line_count << ": Invalid utf-8 at byte " << int(line.end() - line_end) << '\n'; + + // Convert it to utf-16 and write to the file + vector<unsigned short> utf16_line; + utf8to16(line_start, line_end, back_inserter(utf16_line)); + + // Back to utf-8 and compare it to the original line. + string back_to_utf8; + utf16to8(utf16_line.begin(), utf16_line.end(), back_inserter(back_to_utf8)); + if (back_to_utf8.compare(string(line_start, line_end)) != 0) + cout << "Line " << line_count << ": Conversion to UTF-16 and back failed" << '\n'; + + // Now, convert it to utf-32, back to utf-8 and compare + vector <unsigned> utf32_line; + utf8to32(line_start, line_end, back_inserter(utf32_line)); + back_to_utf8.clear(); + utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8)); + if (back_to_utf8.compare(string(line_start, line_end)) != 0) + cout << "Line " << line_count << ": Conversion to UTF-32 and back failed" << '\n'; + + // Now, iterate and back + unsigned char_count = 0; + string::iterator it = line_start; + while (it != line_end) { + unsigned int next_cp = peek_next(it, line_end); + if (next(it, line_end) != next_cp) + cout << "Line " << line_count << ": Error: peek_next gave a different result than next" << '\n'; + char_count++; + } + if (char_count != utf32_line.size()) + cout << "Line " << line_count << ": Error in iterating with next - wrong number of characters" << '\n'; + + string::iterator adv_it = line_start; + utf8::advance(adv_it, char_count, line_end); + if (adv_it != line_end) + cout << "Line " << line_count << ": Error in advance function" << '\n'; + + if (string::size_type(utf8::distance(line_start, line_end)) != char_count) + cout << "Line " << line_count << ": Error in distance function" << '\n'; + + while (it != line_start) { + previous(it, line.rend().base()); + char_count--; + } + if (char_count != 0) + cout << "Line " << line_count << ": Error in iterating with previous - wrong number of characters" << '\n'; + + // Try utf8::iterator + utf8::iterator<string::iterator> u8it(line_start, line_start, line_end); + if (!utf32_line.empty() && *u8it != utf32_line.at(0)) + cout << "Line " << line_count << ": Error in utf::iterator * operator" << '\n'; + if (std::distance(u8it, utf8::iterator<string::iterator>(line_end, line_start, line_end)) != static_cast<int>(utf32_line.size())) + cout << "Line " << line_count << ": Error in using utf::iterator with std::distance - wrong number of characters" << '\n'; + + std::advance(u8it, utf32_line.size()); + if (u8it != utf8::iterator<string::iterator>(line_end, line_start, line_end)) + cout << "Line " << line_count << ": Error in using utf::iterator with std::advance" << '\n'; + + + //======================== Now, the unchecked versions ====================== + // Convert it to utf-16 and compare to the checked version + vector<unsigned short> utf16_line_unchecked; + unchecked::utf8to16(line_start, line_end, back_inserter(utf16_line_unchecked)); + + if (utf16_line != utf16_line_unchecked) + cout << "Line " << line_count << ": Error in unchecked::utf8to16" << '\n'; + + // Back to utf-8 and compare it to the original line. + back_to_utf8.clear(); + unchecked::utf16to8(utf16_line_unchecked.begin(), utf16_line_unchecked.end(), back_inserter(back_to_utf8)); + if (back_to_utf8.compare(string(line_start, line_end)) != 0) + cout << "Line " << line_count << ": Unchecked conversion to UTF-16 and back failed" << '\n'; + + // Now, convert it to utf-32, back to utf-8 and compare + vector <unsigned> utf32_line_unchecked; + unchecked::utf8to32(line_start, line_end, back_inserter(utf32_line_unchecked)); + if (utf32_line != utf32_line_unchecked) + cout << "Line " << line_count << ": Error in unchecked::utf8to32" << '\n'; + + back_to_utf8.clear(); + unchecked::utf32to8(utf32_line.begin(), utf32_line.end(), back_inserter(back_to_utf8)); + if (back_to_utf8.compare(string(line_start, line_end)) != 0) + cout << "Line " << line_count << ": Unchecked conversion to UTF-32 and back failed" << '\n'; + + // Now, iterate and back + char_count = 0; + it = line_start; + while (it != line_end) { + unsigned int next_cp = unchecked::peek_next(it); + if (unchecked::next(it) != next_cp) + cout << "Line " << line_count << ": Error: unchecked::peek_next gave a different result than unchecked::next" << '\n';; + char_count++; + } + if (char_count != utf32_line.size()) + cout << "Line " << line_count << ": Error in iterating with unchecked::next - wrong number of characters" << '\n'; + + adv_it = line_start; + utf8::unchecked::advance(adv_it, char_count); + if (adv_it != line_end) + cout << "Line " << line_count << ": Error in unchecked::advance function" << '\n'; + + if (string::size_type(utf8::unchecked::distance(line_start, line_end)) != char_count) + cout << "Line " << line_count << ": Error in unchecked::distance function" << '\n'; + + while (it != line_start) { + unchecked::previous(it); + char_count--; + } + if (char_count != 0) + cout << "Line " << line_count << ": Error in iterating with unchecked::previous - wrong number of characters" << '\n'; + + // Try utf8::unchecked::iterator + utf8::unchecked::iterator<string::iterator> un_u8it(line_start); + if (!utf32_line.empty() && *un_u8it != utf32_line.at(0)) + cout << "Line " << line_count << ": Error in utf::unchecked::iterator * operator" << '\n'; + if (std::distance(un_u8it, utf8::unchecked::iterator<string::iterator>(line_end)) != static_cast<int>(utf32_line.size())) + cout << "Line " << line_count << ": Error in using utf::unchecked::iterator with std::distance - wrong number of characters" << '\n'; + + std::advance(un_u8it, utf32_line.size()); + if (un_u8it != utf8::unchecked::iterator<string::iterator>(line_end)) + cout << "Line " << line_count << ": Error in using utf::unchecked::iterator with std::advance" << '\n'; + } +} |