summaryrefslogtreecommitdiffstats
path: root/src/lib-compression/ostream-bzlib.c
blob: 4d072b2bc37a22dc501f8b1579fc9df9dd4ecc83 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
/* Copyright (c) 2010-2018 Dovecot authors, see the included COPYING file */

#include "lib.h"

#ifdef HAVE_BZLIB

#include "ostream-private.h"
#include "ostream-zlib.h"
#include <bzlib.h>

#define CHUNK_SIZE (1024*64)

struct bzlib_ostream {
	struct ostream_private ostream;
	bz_stream zs;

	char outbuf[CHUNK_SIZE];
	unsigned int outbuf_offset, outbuf_used;

	bool flushed:1;
};

/* in bzlib, level is actually block size. From bzlib manual:

   The block size affects both the compression ratio achieved,
   and the amount of memory needed for compression and decompression.

   BlockSize 1 through BlockSize 9 specify the block size to be 100,000 bytes
   through 900,000 bytes respectively. The default is to use the maximum block
   size.

   Larger block sizes give rapidly diminishing marginal returns.
   Most of the compression comes from the first two or three hundred k of
   block size, a fact worth bearing in mind when using bzip2 on small machines.
   It is also important to appreciate that the decompression memory
   requirement is set at compression time by the choice of block size.

   * In general, try and use the largest block size memory constraints
     allow, since that maximises the compression achieved.
   * Compression and decompression speed are virtually unaffected by block
     size.

   Another significant point applies to files which fit in a single block -
   that means most files you'd encounter using a large block size. The
   amount of real memory touched is proportional to the size of the file,
   since the file is smaller than a block. For example, compressing a file
   20,000 bytes long with the flag BlockSize 9 will cause the compressor to
   allocate around 7600k of memory, but only touch 400k + 20000 * 8 = 560 kbytes
   of it. Similarly, the decompressor will allocate 3700k but only
   touch 100k + 20000 * 4 = 180 kbytes.
*/

int compression_get_min_level_bz2(void)
{
	return 1;
}

int compression_get_default_level_bz2(void)
{
	/* default is maximum level */
	return 9;
}

int compression_get_max_level_bz2(void)
{
	return 9;
}

static void o_stream_bzlib_close(struct iostream_private *stream,
				 bool close_parent)
{
	struct bzlib_ostream *zstream = (struct bzlib_ostream *)stream;

	(void)BZ2_bzCompressEnd(&zstream->zs);
	if (close_parent)
		o_stream_close(zstream->ostream.parent);
}

static int o_stream_zlib_send_outbuf(struct bzlib_ostream *zstream)
{
	ssize_t ret;
	size_t size;

	if (zstream->outbuf_used == 0)
		return 1;

	size = zstream->outbuf_used - zstream->outbuf_offset;
	i_assert(size > 0);
	ret = o_stream_send(zstream->ostream.parent,
			    zstream->outbuf + zstream->outbuf_offset, size);
	if (ret < 0) {
		o_stream_copy_error_from_parent(&zstream->ostream);
		return -1;
	}
	if ((size_t)ret != size) {
		zstream->outbuf_offset += ret;
		return 0;
	}
	zstream->outbuf_offset = 0;
	zstream->outbuf_used = 0;
	return 1;
}

static ssize_t
o_stream_bzlib_send_chunk(struct bzlib_ostream *zstream,
			  const void *data, size_t size)
{
	bz_stream *zs = &zstream->zs;
	int ret;

	i_assert(zstream->outbuf_used == 0);

	zs->next_in = (void *)data;
	zs->avail_in = size;
	while (zs->avail_in > 0) {
		if (zs->avail_out == 0) {
			/* previous block was compressed. send it and start
			   compression for a new block. */
			zs->next_out = zstream->outbuf;
			zs->avail_out = sizeof(zstream->outbuf);

			zstream->outbuf_used = sizeof(zstream->outbuf);
			if ((ret = o_stream_zlib_send_outbuf(zstream)) < 0)
				return -1;
			if (ret == 0) {
				/* parent stream's buffer full */
				break;
			}
		}

		switch ((ret = BZ2_bzCompress(zs, BZ_RUN))) {
		case BZ_RUN_OK:
			break;
		case BZ_MEM_ERROR:
			i_fatal_status(FATAL_OUTOFMEM, "bzip2.write(%s): Out of memory",
				       o_stream_get_name(&zstream->ostream.ostream));
		default:
			i_fatal("BZ2_bzCompress() failed with %d", ret);
		}
	}
	size -= zs->avail_in;

	zstream->flushed = FALSE;
	return size;
}

static int o_stream_bzlib_send_flush(struct bzlib_ostream *zstream, bool final)
{
	bz_stream *zs = &zstream->zs;
	size_t len;
	bool done = FALSE;
	int ret;

	i_assert(zs->avail_in == 0);

	if (zstream->flushed) {
		i_assert(zstream->outbuf_used == 0);
		return 1;
	}

	if ((ret = o_stream_flush_parent_if_needed(&zstream->ostream)) <= 0)
		return ret;
	if ((ret = o_stream_zlib_send_outbuf(zstream)) <= 0)
		return ret;

	/* do not attempt to finish the stream early */
	if (!final)
		return 1;

	i_assert(zstream->outbuf_used == 0);
	do {
		len = sizeof(zstream->outbuf) - zs->avail_out;
		if (len != 0) {
			zs->next_out = zstream->outbuf;
			zs->avail_out = sizeof(zstream->outbuf);

			zstream->outbuf_used = len;
			if ((ret = o_stream_zlib_send_outbuf(zstream)) <= 0)
				return ret;
			if (done)
				break;
		}

		ret = BZ2_bzCompress(zs, BZ_FINISH);
		switch (ret) {
		case BZ_RUN_OK:
		case BZ_FLUSH_OK:
		case BZ_STREAM_END:
			done = TRUE;
			break;
		case BZ_FINISH_OK:
			break;
                case BZ_MEM_ERROR:
                        i_fatal_status(FATAL_OUTOFMEM, "bzip2.write(%s): Out of memory",
                                       o_stream_get_name(&zstream->ostream.ostream));
                default:
                        i_fatal("BZ2_bzCompress() failed with %d", ret);
		}
	} while (zs->avail_out != sizeof(zstream->outbuf));

	if (final)
		zstream->flushed = TRUE;
	i_assert(zstream->outbuf_used == 0);
	return 1;
}

static int o_stream_bzlib_flush(struct ostream_private *stream)
{
	struct bzlib_ostream *zstream = (struct bzlib_ostream *)stream;
	int ret;
	if ((ret = o_stream_bzlib_send_flush(zstream, stream->finished)) < 0)
		return -1;
	else if (ret > 0)
		return o_stream_flush_parent(stream);
	return ret;
}

static size_t
o_stream_bzlib_get_buffer_used_size(const struct ostream_private *stream)
{
	const struct bzlib_ostream *zstream =
		(const struct bzlib_ostream *)stream;

	/* outbuf has already compressed data that we're trying to send to the
	   parent stream. We're not including bzlib's internal compression
	   buffer size. */
	return (zstream->outbuf_used - zstream->outbuf_offset) +
		o_stream_get_buffer_used_size(stream->parent);
}

static size_t
o_stream_bzlib_get_buffer_avail_size(const struct ostream_private *stream)
{
	/* FIXME: not correct - this is counting compressed size, which may be
	   too larger than uncompressed size in some situations. Fixing would
	   require some kind of additional buffering. */
	return o_stream_get_buffer_avail_size(stream->parent);
}

static ssize_t
o_stream_bzlib_sendv(struct ostream_private *stream,
		    const struct const_iovec *iov, unsigned int iov_count)
{
	struct bzlib_ostream *zstream = (struct bzlib_ostream *)stream;
	ssize_t ret, bytes = 0;
	unsigned int i;

	if ((ret = o_stream_zlib_send_outbuf(zstream)) <= 0) {
		/* error / we still couldn't flush existing data to
		   parent stream. */
		return ret;
	}

	for (i = 0; i < iov_count; i++) {
		ret = o_stream_bzlib_send_chunk(zstream, iov[i].iov_base,
						iov[i].iov_len);
		if (ret < 0)
			return -1;
		bytes += ret;
		if ((size_t)ret != iov[i].iov_len)
			break;
	}
	stream->ostream.offset += bytes;

	/* avail_in!=0 check is used to detect errors. if it's non-zero here
	   it simply means we didn't send all the data */
	zstream->zs.avail_in = 0;
	return bytes;
}

struct ostream *o_stream_create_bz2(struct ostream *output, int level)
{
	struct bzlib_ostream *zstream;
	int ret;

	i_assert(level >= 1 && level <= 9);

	zstream = i_new(struct bzlib_ostream, 1);
	zstream->ostream.sendv = o_stream_bzlib_sendv;
	zstream->ostream.flush = o_stream_bzlib_flush;
	zstream->ostream.get_buffer_used_size =
		o_stream_bzlib_get_buffer_used_size;
	zstream->ostream.get_buffer_avail_size =
		o_stream_bzlib_get_buffer_avail_size;
	zstream->ostream.iostream.close = o_stream_bzlib_close;

	ret = BZ2_bzCompressInit(&zstream->zs, level, 0, 0);
	switch (ret) {
	case BZ_OK:
		break;
	case BZ_MEM_ERROR:
		i_fatal_status(FATAL_OUTOFMEM,
			       "bzlib: Out of memory");
	case BZ_CONFIG_ERROR:
		i_fatal("Wrong bzlib library version (broken compilation)");
	case BZ_PARAM_ERROR:
		i_fatal("bzlib: Invalid parameters");
	default:
		i_fatal("BZ2_bzCompressInit() failed with %d", ret);
	}

	zstream->zs.next_out = zstream->outbuf;
	zstream->zs.avail_out = sizeof(zstream->outbuf);
	return o_stream_create(&zstream->ostream, output,
			       o_stream_get_fd(output));
}
#endif