src/boost/libs/iostreams/test/gzip_test.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246

// (C) Copyright 2008 CodeRage, LLC (turkanis at coderage dot com)
// (C) Copyright 2004-2007 Jonathan Turkanis
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt.)

// See http://www.boost.org/libs/iostreams for documentation.

#include <cstddef>
#include <string>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/device/array.hpp>
#include <boost/iostreams/device/back_inserter.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filter/test.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/ref.hpp>
#include <boost/range/iterator_range.hpp>
#include <boost/test/test_tools.hpp>
#include <boost/test/unit_test.hpp>
#include "detail/sequence.hpp"
#include "detail/verification.hpp"

using namespace boost;
using namespace boost::iostreams;
using namespace boost::iostreams::test;
namespace io = boost::iostreams;
using boost::unit_test::test_suite;     

struct gzip_alloc : std::allocator<char> {
    gzip_alloc() { }
    gzip_alloc(const gzip_alloc& other) { }
    template<typename T>
    gzip_alloc(const std::allocator<T>& other) { }
};

void compression_test()
{
    text_sequence      data;

    // Test compression and decompression with metadata
    for (int i = 0; i < 4; ++i) {
        gzip_params params;
        if (i & 1) {
            params.file_name = "original file name";
        }
        if (i & 2) {
            params.comment = "detailed file description";
        }
        gzip_compressor    out(params);
        gzip_decompressor  in;
        BOOST_CHECK(
            test_filter_pair( boost::ref(out), 
                              boost::ref(in), 
                              std::string(data.begin(), data.end()) )
        );
        BOOST_CHECK(in.file_name() == params.file_name);
        BOOST_CHECK(in.comment() == params.comment);
    }

    // Test compression and decompression with custom allocator
    BOOST_CHECK(
        test_filter_pair( basic_gzip_compressor<gzip_alloc>(), 
                          basic_gzip_decompressor<gzip_alloc>(), 
                          std::string(data.begin(), data.end()) )
    );
}

void multiple_member_test()
{
    text_sequence      data;
    std::vector<char>  temp, dest;

    // Write compressed data to temp, twice in succession
    filtering_ostream out;
    out.push(gzip_compressor());
    out.push(io::back_inserter(temp));
    io::copy(make_iterator_range(data), out);
    out.push(io::back_inserter(temp));
    io::copy(make_iterator_range(data), out);

    // Read compressed data from temp into dest
    filtering_istream in;
    in.push(gzip_decompressor());
    in.push(array_source(&temp[0], temp.size()));
    io::copy(in, io::back_inserter(dest));

    // Check that dest consists of two copies of data
    BOOST_REQUIRE_EQUAL(data.size() * 2, dest.size());
    BOOST_CHECK(std::equal(data.begin(), data.end(), dest.begin()));
    BOOST_CHECK(std::equal(data.begin(), data.end(), dest.begin() + dest.size() / 2));

    dest.clear();
    io::copy(
        array_source(&temp[0], temp.size()),
        io::compose(gzip_decompressor(), io::back_inserter(dest)));

    // Check that dest consists of two copies of data
    BOOST_REQUIRE_EQUAL(data.size() * 2, dest.size());
    BOOST_CHECK(std::equal(data.begin(), data.end(), dest.begin()));
    BOOST_CHECK(std::equal(data.begin(), data.end(), dest.begin() + dest.size() / 2));
}

void array_source_test()
{
    std::string data = "simple test string.";
    std::string encoded;

    filtering_ostream out;
    out.push(gzip_compressor());
    out.push(io::back_inserter(encoded));
    io::copy(make_iterator_range(data), out);

    std::string res;
    io::array_source src(encoded.data(),encoded.length());
    io::copy(io::compose(io::gzip_decompressor(), src), io::back_inserter(res));
    
    BOOST_CHECK_EQUAL(data, res);
}

#if defined(BOOST_MSVC)
# pragma warning(push)
# pragma warning(disable:4309)  // Truncation of constant value
#endif

void header_test()
{
    // This test is in response to https://svn.boost.org/trac/boost/ticket/5908
    // which describes a problem parsing gzip headers with extra fields as
    // defined in RFC 1952 (http://www.ietf.org/rfc/rfc1952.txt).
    // The extra field data used here is characteristic of the tabix file
    // format (http://samtools.sourceforge.net/tabix.shtml).
    const char header_bytes[] = {
        static_cast<char>(gzip::magic::id1),
        static_cast<char>(gzip::magic::id2),
        gzip::method::deflate, // Compression Method: deflate
        gzip::flags::extra | gzip::flags::name | gzip::flags::comment, // flags
        '\x22', '\x9c', '\xf3', '\x4e', // 4 byte modification time (little endian)
        gzip::extra_flags::best_compression, // XFL
        gzip::os_unix, // OS
        6, 0, // 2 byte length of extra field (little endian, 6 bytes)
        'B', 'C', 2, 0, 0, 0, // 6 bytes worth of extra field data
        'a', 'b', 'c', 0, // original filename, null terminated
        'n', 'o', ' ', 'c', 'o', 'm', 'm', 'e', 'n', 't', 0, // comment
    };
    size_t sz = sizeof(header_bytes)/sizeof(header_bytes[0]);

    boost::iostreams::detail::gzip_header hdr;
    for (size_t i = 0; i < sz; ++i) {
        hdr.process(header_bytes[i]);

        // Require that we are done at the last byte, not before.
        if (i == sz-1)
            BOOST_REQUIRE(hdr.done());
        else
            BOOST_REQUIRE(!hdr.done());
    }

    BOOST_CHECK_EQUAL("abc", hdr.file_name());
    BOOST_CHECK_EQUAL("no comment", hdr.comment());
    BOOST_CHECK_EQUAL(0x4ef39c22, hdr.mtime());
    BOOST_CHECK_EQUAL(gzip::os_unix, hdr.os());
}

#if defined(BOOST_MSVC)
# pragma warning(pop)
#endif

void empty_file_test()
{
    // This test is in response to https://svn.boost.org/trac/boost/ticket/5237
    // The previous implementation of gzip_compressor only wrote the gzip file
    // header when the first bytes of uncompressed input were processed, causing
    // incorrect behavior for empty files
    BOOST_CHECK(
        test_filter_pair( gzip_compressor(),
                          gzip_decompressor(),
                          std::string() )
    );
}

void multipart_test()
{
    // This test verifies that the gzip_decompressor properly handles a file
    // that was written in multiple parts using Z_FULL_FLUSH, and in particular
    // handles the CRC properly when one of those parts is empty.
    const char multipart_file[] = {
        '\x1f', '\x8b', '\x08', '\x00', '\x00', '\x00', '\x00', '\x00', '\x02', '\xff', '\xf2', '\xc9',
        '\xcc', '\x4b', '\x55', '\x30', '\xe4', '\xf2', '\x01', '\x51', '\x46', '\x10', '\xca', '\x98',
        '\x0b', '\x00', '\x00', '\x00', '\xff', '\xff', '\x03', '\x00', '\xdb', '\xa7', '\x83', '\xc9',
        '\x15', '\x00', '\x00', '\x00', '\x1f', '\x8b', '\x08', '\x00', '\x00', '\x00', '\x00', '\x00',
        '\x02', '\xff', '\xf2', '\xc9', '\xcc', '\x4b', '\x55', '\x30', '\xe1', '\xf2', '\x01', '\x51',
        '\xa6', '\x10', '\xca', '\x8c', '\x0b', '\x00', '\x00', '\x00', '\xff', '\xff', '\x03', '\x00',
        '\x41', '\xe3', '\xcc', '\xaa', '\x15', '\x00', '\x00', '\x00', '\x1f', '\x8b', '\x08', '\x00',
        '\x00', '\x00', '\x00', '\x00', '\x02', '\xff', '\x02', '\x00', '\x00', '\x00', '\xff', '\xff',
        '\x03', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x1f', '\x8b',
        '\x08', '\x00', '\x00', '\x00', '\x00', '\x00', '\x02', '\xff', '\xf2', '\xc9', '\xcc', '\x4b',
        '\x55', '\x30', '\xe7', '\xf2', '\x01', '\x51', '\x16', '\x10', '\xca', '\x92', '\x0b', '\x00',
        '\x00', '\x00', '\xff', '\xff', '\x03', '\x00', '\x2b', '\xac', '\xd3', '\xf5', '\x15', '\x00',
        '\x00', '\x00'
    };

    filtering_istream in;
    std::string line;

    in.push(gzip_decompressor());
    in.push(io::array_source(multipart_file, sizeof(multipart_file)));

    // First part
    std::getline(in, line);
    BOOST_CHECK_EQUAL("Line 1", line);
    std::getline(in, line);
    BOOST_CHECK_EQUAL("Line 2", line);
    std::getline(in, line);
    BOOST_CHECK_EQUAL("Line 3", line);

    // Second part immediately follows
    std::getline(in, line);
    BOOST_CHECK_EQUAL("Line 4", line);
    std::getline(in, line);
    BOOST_CHECK_EQUAL("Line 5", line);
    std::getline(in, line);
    BOOST_CHECK_EQUAL("Line 6", line);

    // Then an empty part, followed by one last 3-line part.
    std::getline(in, line);
    BOOST_CHECK_EQUAL("Line 7", line);
    std::getline(in, line);
    BOOST_CHECK_EQUAL("Line 8", line);
    std::getline(in, line);
    BOOST_CHECK_EQUAL("Line 9", line);

    // Check for gzip errors too.
    BOOST_CHECK(!in.bad());
}

test_suite* init_unit_test_suite(int, char* []) 
{
    test_suite* test = BOOST_TEST_SUITE("gzip test");
    test->add(BOOST_TEST_CASE(&compression_test));
    test->add(BOOST_TEST_CASE(&multiple_member_test));
    test->add(BOOST_TEST_CASE(&array_source_test));
    test->add(BOOST_TEST_CASE(&header_test));
    test->add(BOOST_TEST_CASE(&empty_file_test));
    test->add(BOOST_TEST_CASE(&multipart_test));
    return test;
}