summaryrefslogtreecommitdiffstats
path: root/src/boost/libs/program_options/test/test_convert.cpp
blob: f03a19eb5dc61b038a0068bda5597756d9668f00 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
// Copyright Vladimir Prus 2002-2004.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt
// or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <cstring>
#include <cassert>
#include <string>
#include <fstream>
#include <sstream>
#include <iostream>
#include <boost/progress.hpp>
#include <boost/bind.hpp>
#include <boost/ref.hpp>

#include <boost/program_options/detail/convert.hpp>
#include <boost/program_options/detail/utf8_codecvt_facet.hpp>

#include "minitest.hpp"

using namespace std;

string file_content(const string& filename)
{
    ifstream ifs(filename.c_str());
    assert(ifs);
    
    stringstream ss;
    ss << ifs.rdbuf();
    
    return ss.str();
}

// A version of from_8_bit which does not use functional object, for
// performance comparison.
std::wstring from_8_bit_2(const std::string& s, 
                          const codecvt<wchar_t, char, mbstate_t>& cvt)
{
    std::wstring result;


    std::mbstate_t state = std::mbstate_t();
    
    const char* from = s.data();
    const char* from_end = s.data() + s.size();
    // The interace of cvt is not really iterator-like, and it's
    // not possible the tell the required output size without the conversion.
    // All we can is convert data by pieces.
    while(from != from_end) {
            
        // std::basic_string does not provide non-const pointers to the data,
        // so converting directly into string is not possible.
        wchar_t buffer[32];
            
        wchar_t* to_next = buffer;
        // Try to convert remaining input.
        std::codecvt_base::result r = 
            cvt.in(state, from, from_end, from, buffer, buffer + 32, to_next);
        
        if (r == std::codecvt_base::error)
            throw logic_error("character conversion failed");
        // 'partial' is not an error, it just means not all source characters
        // we converted. However, we need to check that at least one new target
        // character was produced. If not, it means the source data is 
        // incomplete, and since we don't have extra data to add to source, it's
        // error.
        if (to_next == buffer)
            throw logic_error("character conversion failed");

        // Add converted characters
        result.append(buffer, to_next);
    }

    return result;        
}


void test_convert(const std::string& input, 
                  const std::string& expected_output)
{
    boost::program_options::detail::utf8_codecvt_facet facet;
    
    std::wstring output;
    { 
        boost::progress_timer t;
        for (int i = 0; i < 10000; ++i)
            output = boost::from_8_bit(
                input,
                facet);
    }

    {
        boost::progress_timer t;
        for (int i = 0; i < 10000; ++i)
            output = from_8_bit_2(
                input,
                facet);
    }

    BOOST_CHECK(output.size()*2 == expected_output.size());

    for(unsigned i = 0; i < output.size(); ++i) {

        {
            unsigned low = output[i];
            low &= 0xFF;
            unsigned low2 = expected_output[2*i];
            low2 &= 0xFF;
            BOOST_CHECK(low == low2);
        }
        {        
            unsigned high = output[i];
            high >>= 8;
            high &= 0xFF;
            unsigned high2 = expected_output[2*i+1];            
            BOOST_CHECK(high == high2);
        }
    }

    string ref = boost::to_8_bit(output, facet);

    BOOST_CHECK(ref == input);
}

int main(int ac, char* av[])
{       
    std::string input = file_content("utf8.txt");
    std::string expected = file_content("ucs2.txt");

    test_convert(input, expected);
    
    if (ac > 1) {
        cout << "Trying to convert the command line argument\n";
    
        locale::global(locale(""));
        std::wstring w = boost::from_local_8_bit(av[1]);
 
        cout << "Got something, printing decimal code point values\n";
        for (unsigned i = 0; i < w.size(); ++i) {
            cout << (unsigned)w[i] << "\n";
        }
        
    }
    
    return 0;
}