summaryrefslogtreecommitdiffstats
path: root/src/boost/libs/detail/test/test_utf8_codecvt.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
commit483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
treee5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/boost/libs/detail/test/test_utf8_codecvt.cpp
parentInitial commit. (diff)
downloadceph-upstream.tar.xz
ceph-upstream.zip
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/boost/libs/detail/test/test_utf8_codecvt.cpp')
-rw-r--r--src/boost/libs/detail/test/test_utf8_codecvt.cpp302
1 files changed, 302 insertions, 0 deletions
diff --git a/src/boost/libs/detail/test/test_utf8_codecvt.cpp b/src/boost/libs/detail/test/test_utf8_codecvt.cpp
new file mode 100644
index 00000000..f92706c6
--- /dev/null
+++ b/src/boost/libs/detail/test/test_utf8_codecvt.cpp
@@ -0,0 +1,302 @@
+/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
+// test_utf8_codecvt.cpp
+
+// (C) Copyright 2002-4 Robert Ramey - http://www.rrsd.com .
+// Use, modification and distribution is subject to the Boost Software
+// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+#include <algorithm> // std::copy
+#include <fstream>
+#include <iostream>
+#include <iterator>
+#include <locale>
+#include <vector>
+#include <string>
+
+#include <cstddef> // size_t
+#include <cwchar>
+#include <boost/config.hpp>
+#include <boost/core/no_exceptions_support.hpp>
+
+#define BOOST_UTF8_BEGIN_NAMESPACE namespace boost { namespace detail {
+#define BOOST_UTF8_END_NAMESPACE } }
+#include <boost/detail/utf8_codecvt_facet.hpp>
+#include <boost/detail/utf8_codecvt_facet.ipp>
+
+#if defined(BOOST_NO_STDC_NAMESPACE)
+namespace std{
+ using ::size_t;
+ using ::wcslen;
+#if !defined(UNDER_CE) && !defined(__PGIC__)
+ using ::w_int;
+#endif
+} // namespace std
+#endif
+
+// Note: copied from boost/iostreams/char_traits.hpp
+//
+// Dinkumware that comes with QNX Momentics 6.3.0, 4.0.2, incorrectly defines
+// the EOF and WEOF macros to not std:: qualify the wint_t type (and so does
+// Sun C++ 5.8 + STLport 4). Fix by placing the def in this scope.
+// NOTE: Use BOOST_WORKAROUND?
+#if (defined(__QNX__) && defined(BOOST_DINKUMWARE_STDLIB)) \
+ || defined(__SUNPRO_CC)
+ using ::std::wint_t;
+#endif
+
+#include <boost/core/lightweight_test.hpp>
+
+template<std::size_t s>
+struct test_data
+{
+ static unsigned char utf8_encoding[];
+ static wchar_t wchar_encoding[];
+};
+
+template<>
+unsigned char test_data<2>::utf8_encoding[] = {
+ 0x01,
+ 0x7f,
+ 0xc2, 0x80,
+ 0xdf, 0xbf,
+ 0xe0, 0xa0, 0x80,
+ 0xe7, 0xbf, 0xbf
+};
+
+template<>
+wchar_t test_data<2>::wchar_encoding[] = {
+ 0x0001,
+ 0x007f,
+ 0x0080,
+ 0x07ff,
+ 0x0800,
+ 0x7fff
+};
+
+template<>
+unsigned char test_data<4>::utf8_encoding[] = {
+ 0x01,
+ 0x7f,
+ 0xc2, 0x80,
+ 0xdf, 0xbf,
+ 0xe0, 0xa0, 0x80,
+ 0xef, 0xbf, 0xbf,
+ 0xf0, 0x90, 0x80, 0x80,
+ 0xf4, 0x8f, 0xbf, 0xbf,
+ /* codecvt implementations for clang and gcc don't handle more than 21 bits and
+ * return eof accordlingly. So don't test the whole 32 range
+ */
+ /*
+ 0xf7, 0xbf, 0xbf, 0xbf,
+ 0xf8, 0x88, 0x80, 0x80, 0x80,
+ 0xfb, 0xbf, 0xbf, 0xbf, 0xbf,
+ 0xfc, 0x84, 0x80, 0x80, 0x80, 0x80,
+ 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf
+ */
+};
+
+template<>
+wchar_t test_data<4>::wchar_encoding[] = {
+ (wchar_t)0x00000001,
+ (wchar_t)0x0000007f,
+ (wchar_t)0x00000080,
+ (wchar_t)0x000007ff,
+ (wchar_t)0x00000800,
+ (wchar_t)0x0000ffff,
+ (wchar_t)0x00010000,
+ (wchar_t)0x0010ffff,
+ /* codecvt implementations for clang and gcc don't handle more than 21 bits and
+ * return eof accordlingly. So don't test the whole 32 range
+ */
+ /*
+ (wchar_t)0x001fffff,
+ (wchar_t)0x00200000,
+ (wchar_t)0x03ffffff,
+ (wchar_t)0x04000000,
+ (wchar_t)0x7fffffff
+ */
+};
+
+int
+test_main(int /* argc */, char * /* argv */[]) {
+ std::locale utf8_locale
+ = std::locale(
+ std::locale::classic(),
+ new boost::detail::utf8_codecvt_facet
+ );
+
+ typedef char utf8_t;
+ // define test data compatible with the wchar_t implementation
+ // as either ucs-2 or ucs-4 depending on the compiler/library.
+ typedef test_data<sizeof(wchar_t)> td;
+
+ // Send our test UTF-8 data to file
+ {
+ std::ofstream ofs;
+ ofs.open("test.dat");
+ std::copy(
+ td::utf8_encoding,
+ td::utf8_encoding + sizeof(td::utf8_encoding) / sizeof(unsigned char),
+ std::ostream_iterator<utf8_t>(ofs)
+ );
+ }
+
+ // Read the test data back in, converting to UCS-4 on the way in
+ std::vector<wchar_t> from_file;
+ {
+ std::wifstream ifs;
+ ifs.imbue(utf8_locale);
+ ifs.open("test.dat");
+
+ std::wint_t item = 0;
+ // note can't use normal vector from iterator constructor because
+ // dinkumware doesn't have it.
+ for(;;){
+ item = ifs.get();
+ if(item == WEOF)
+ break;
+ //ifs >> item;
+ //if(ifs.eof())
+ // break;
+ from_file.push_back(item);
+ }
+ }
+
+ BOOST_TEST(std::equal(from_file.begin(), from_file.end(), td::wchar_encoding));
+
+ // Send the UCS4_data back out, converting to UTF-8
+ {
+ std::wofstream ofs;
+ ofs.imbue(utf8_locale);
+ ofs.open("test2.dat");
+ std::copy(
+ from_file.begin(),
+ from_file.end(),
+ std::ostream_iterator<wchar_t, wchar_t>(ofs)
+ );
+ }
+
+ // Make sure that both files are the same
+ {
+ typedef std::istream_iterator<utf8_t> is_iter;
+ is_iter end_iter;
+
+ std::ifstream ifs1("test.dat");
+ is_iter it1(ifs1);
+ std::vector<utf8_t> data1;
+ std::copy(it1, end_iter, std::back_inserter(data1));
+
+ std::ifstream ifs2("test2.dat");
+ is_iter it2(ifs2);
+ std::vector<utf8_t> data2;
+ std::copy(it2, end_iter, std::back_inserter(data2));
+
+ BOOST_TEST(data1 == data2);
+ }
+
+ // some libraries have trouble that only shows up with longer strings
+
+ const wchar_t * test3_data = L"\
+ <?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\" ?>\
+ <!DOCTYPE boost_serialization>\
+ <boost_serialization signature=\"serialization::archive\" version=\"3\">\
+ <a class_id=\"0\" tracking_level=\"0\">\
+ <b>1</b>\
+ <f>96953204</f>\
+ <g>177129195</g>\
+ <l>1</l>\
+ <m>5627</m>\
+ <n>23010</n>\
+ <o>7419</o>\
+ <p>16212</p>\
+ <q>4086</q>\
+ <r>2749</r>\
+ <c>-33</c>\
+ <s>124</s>\
+ <t>28</t>\
+ <u>32225</u>\
+ <v>17543</v>\
+ <w>0.84431422</w>\
+ <x>1.0170664757130923</x>\
+ <y>tjbx</y>\
+ <z>cuwjentqpkejp</z>\
+ </a>\
+ </boost_serialization>\
+ ";
+
+ // Send the UCS4_data back out, converting to UTF-8
+ std::size_t l = std::wcslen(test3_data);
+ {
+ std::wofstream ofs;
+ ofs.imbue(utf8_locale);
+ ofs.open("test3.dat");
+ std::copy(
+ test3_data,
+ test3_data + l,
+ std::ostream_iterator<wchar_t, wchar_t>(ofs)
+ );
+ }
+
+ // Make sure that both files are the same
+ {
+ std::wifstream ifs;
+ ifs.imbue(utf8_locale);
+ ifs.open("test3.dat");
+ ifs >> std::noskipws;
+ BOOST_TEST(
+ std::equal(
+ test3_data,
+ test3_data + l,
+ std::istream_iterator<wchar_t, wchar_t>(ifs)
+ )
+ );
+ }
+
+ // Test length calculation
+ {
+ std::codecvt<wchar_t, char, std::mbstate_t> const& fac = std::use_facet< std::codecvt<wchar_t, char, std::mbstate_t> >(utf8_locale);
+ std::mbstate_t mbs = std::mbstate_t();
+ const int utf8_len = sizeof(td::utf8_encoding) / sizeof(*td::utf8_encoding);
+ int res = fac.length(mbs, reinterpret_cast< const char* >(td::utf8_encoding), reinterpret_cast< const char* >(td::utf8_encoding + utf8_len), ~static_cast< std::size_t >(0u));
+ BOOST_TEST_EQ(utf8_len, res);
+ }
+
+ // Test that length calculation detects character boundaries
+ {
+ std::codecvt<wchar_t, char, std::mbstate_t> const& fac = std::use_facet< std::codecvt<wchar_t, char, std::mbstate_t> >(utf8_locale);
+ std::mbstate_t mbs = std::mbstate_t();
+ // The first 5 bytes of utf8_encoding contain 3 complete UTF-8 characters (taking 4 bytes in total) and 1 byte of an incomplete character.
+ // This last byte should not be accounted by length().
+ const int input_len = 5;
+ const int utf8_len = 4;
+ int res = fac.length(mbs, reinterpret_cast< const char* >(td::utf8_encoding), reinterpret_cast< const char* >(td::utf8_encoding + input_len), ~static_cast< std::size_t >(0u));
+ BOOST_TEST_EQ(utf8_len, res);
+ }
+
+ return EXIT_SUCCESS;
+}
+
+int
+main(int argc, char * argv[]){
+
+ int retval = 1;
+ BOOST_TRY{
+ retval = test_main(argc, argv);
+ }
+ #ifndef BOOST_NO_EXCEPTION_STD_NAMESPACE
+ BOOST_CATCH(const std::exception & e){
+ BOOST_ERROR(e.what());
+ }
+ #endif
+ BOOST_CATCH(...){
+ BOOST_ERROR("failed with uncaught exception:");
+ }
+ BOOST_CATCH_END
+
+ int error_count = boost::report_errors();
+ if(error_count > 0)
+ retval = error_count;
+ return retval;
+}
+