diff options
Diffstat (limited to 'src/lib-compression/istream-zlib.c')
-rw-r--r-- | src/lib-compression/istream-zlib.c | 431 |
1 files changed, 431 insertions, 0 deletions
diff --git a/src/lib-compression/istream-zlib.c b/src/lib-compression/istream-zlib.c new file mode 100644 index 0000000..3a975c3 --- /dev/null +++ b/src/lib-compression/istream-zlib.c @@ -0,0 +1,431 @@ +/* Copyright (c) 2010-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" + +#ifdef HAVE_ZLIB + +#include "crc32.h" +#include "istream-private.h" +#include "istream-zlib.h" +#include <zlib.h> + +#define CHUNK_SIZE (1024*64) + +#define GZ_HEADER_MIN_SIZE 10 +#define GZ_TRAILER_SIZE 8 + +#define GZ_MAGIC1 0x1f +#define GZ_MAGIC2 0x8b +#define GZ_FLAG_FHCRC 0x02 +#define GZ_FLAG_FEXTRA 0x04 +#define GZ_FLAG_FNAME 0x08 +#define GZ_FLAG_FCOMMENT 0x10 + +struct zlib_istream { + struct istream_private istream; + + z_stream zs; + uoff_t eof_offset; + size_t prev_size; + uint32_t crc32; + struct stat last_parent_statbuf; + + bool gz:1; + bool marked:1; + bool header_read:1; + bool trailer_read:1; + bool zs_closed:1; + bool starting_concated_output:1; +}; + +static void i_stream_zlib_init(struct zlib_istream *zstream); + +static void i_stream_zlib_close(struct iostream_private *stream, + bool close_parent) +{ + struct zlib_istream *zstream = (struct zlib_istream *)stream; + + if (!zstream->zs_closed) { + (void)inflateEnd(&zstream->zs); + zstream->zs_closed = TRUE; + } + if (close_parent) + i_stream_close(zstream->istream.parent); +} + +static void zlib_read_error(struct zlib_istream *zstream, const char *error) +{ + io_stream_set_error(&zstream->istream.iostream, + "zlib.read(%s): %s at %"PRIuUOFF_T, + i_stream_get_name(&zstream->istream.istream), error, + i_stream_get_absolute_offset(&zstream->istream.istream)); +} + +static int i_stream_zlib_read_header(struct istream_private *stream) +{ + struct zlib_istream *zstream = (struct zlib_istream *)stream; + const unsigned char *data; + size_t size; + unsigned int pos, fextra_size; + int ret; + + ret = i_stream_read_bytes(stream->parent, &data, &size, + zstream->prev_size + 1); + if (size == zstream->prev_size) { + stream->istream.stream_errno = stream->parent->stream_errno; + if (ret == -1 && stream->istream.stream_errno == 0) { + zlib_read_error(zstream, "missing gz header"); + stream->istream.stream_errno = EINVAL; + } + if (ret == -2) { + zlib_read_error(zstream, "gz header is too large"); + stream->istream.stream_errno = EINVAL; + ret = -1; + } + return ret; + } + zstream->prev_size = size; + + if (size < GZ_HEADER_MIN_SIZE) + return 0; + pos = GZ_HEADER_MIN_SIZE; + + if (data[0] != GZ_MAGIC1 || data[1] != GZ_MAGIC2) { + /* missing gzip magic header */ + zlib_read_error(zstream, "wrong magic in header (not gz file?)"); + stream->istream.stream_errno = EINVAL; + return -1; + } + if ((data[3] & GZ_FLAG_FEXTRA) != 0) { + if (pos + 2 > size) + return 0; + + fextra_size = le16_to_cpu_unaligned(&data[pos]); + pos += 2; + if (pos + fextra_size > size) + return 0; + pos += fextra_size; + } + if ((data[3] & GZ_FLAG_FNAME) != 0) { + do { + if (pos == size) + return 0; + } while (data[pos++] != '\0'); + } + if ((data[3] & GZ_FLAG_FCOMMENT) != 0) { + do { + if (pos == size) + return 0; + } while (data[pos++] != '\0'); + } + if ((data[3] & GZ_FLAG_FHCRC) != 0) { + if (pos + 2 > size) + return 0; + pos += 2; + } + i_stream_skip(stream->parent, pos); + zstream->prev_size = 0; + return 1; +} + +static int i_stream_zlib_read_trailer(struct zlib_istream *zstream) +{ + struct istream_private *stream = &zstream->istream; + const unsigned char *data; + size_t size; + int ret; + + ret = i_stream_read_bytes(stream->parent, &data, &size, + GZ_TRAILER_SIZE); + if (size == zstream->prev_size) { + stream->istream.stream_errno = stream->parent->stream_errno; + if (ret == -1 && stream->istream.stream_errno == 0) { + zlib_read_error(zstream, "missing gz trailer"); + stream->istream.stream_errno = EINVAL; + } + return ret; + } + zstream->prev_size = size; + + if (size < GZ_TRAILER_SIZE) + return 0; + + if (le32_to_cpu_unaligned(data) != zstream->crc32) { + zlib_read_error(zstream, "gz trailer has wrong CRC value"); + stream->istream.stream_errno = EINVAL; + return -1; + } + i_stream_skip(stream->parent, GZ_TRAILER_SIZE); + zstream->prev_size = 0; + zstream->trailer_read = TRUE; + return 1; +} + +static ssize_t i_stream_zlib_read(struct istream_private *stream) +{ + struct zlib_istream *zstream = (struct zlib_istream *)stream; + const unsigned char *data; + uoff_t high_offset; + size_t size, out_size; + int ret; + + high_offset = stream->istream.v_offset + (stream->pos - stream->skip); + if (zstream->eof_offset == high_offset) { + /* zlib library returned EOF. */ + if (!zstream->gz) { + /* deflate - ignore if there's still more data */ + stream->istream.eof = TRUE; + return -1; + } + /* gz format - read the trailer */ + if (!zstream->trailer_read) { + do { + ret = i_stream_zlib_read_trailer(zstream); + } while (ret == 0 && stream->istream.blocking); + if (ret <= 0) + return ret; + } + /* See if there's another concatenated gz stream. */ + if (i_stream_read_eof(stream->parent)) { + /* EOF or error */ + stream->istream.stream_errno = + stream->parent->stream_errno; + stream->istream.eof = TRUE; + return -1; + } + /* Multiple gz streams concatenated together */ + zstream->starting_concated_output = TRUE; + } + if (zstream->starting_concated_output) { + /* make sure there actually is something in parent stream. + we don't want to reset the stream unless we actually see + some concated output. */ + ret = i_stream_read_more(stream->parent, &data, &size); + if (ret <= 0) { + if (ret == 0) + return 0; + if (stream->parent->stream_errno != 0) { + stream->istream.stream_errno = + stream->parent->stream_errno; + } + stream->istream.eof = TRUE; + return -1; + } + + /* gzip file with concatenated content */ + stream->cached_stream_size = UOFF_T_MAX; + zstream->eof_offset = UOFF_T_MAX; + zstream->header_read = FALSE; + zstream->trailer_read = FALSE; + zstream->crc32 = 0; + zstream->starting_concated_output = FALSE; + + (void)inflateEnd(&zstream->zs); + i_stream_zlib_init(zstream); + } + + if (!zstream->header_read) { + do { + ret = i_stream_zlib_read_header(stream); + } while (ret == 0 && stream->istream.blocking); + if (ret <= 0) + return ret; + zstream->header_read = TRUE; + } + + if (!zstream->marked) { + if (!i_stream_try_alloc(stream, CHUNK_SIZE, &out_size)) + return -2; /* buffer full */ + } else { + /* try to avoid compressing, so we can quickly seek backwards */ + if (!i_stream_try_alloc_avoid_compress(stream, CHUNK_SIZE, &out_size)) + return -2; /* buffer full */ + } + + if (i_stream_read_more(stream->parent, &data, &size) < 0) { + if (stream->parent->stream_errno != 0) { + stream->istream.stream_errno = + stream->parent->stream_errno; + } else { + i_assert(stream->parent->eof); + zlib_read_error(zstream, "unexpected EOF"); + stream->istream.stream_errno = EPIPE; + } + return -1; + } + if (size == 0) { + /* no more input */ + i_assert(!stream->istream.blocking); + return 0; + } + + zstream->zs.next_in = (void *)data; + zstream->zs.avail_in = size; + + zstream->zs.next_out = stream->w_buffer + stream->pos; + zstream->zs.avail_out = out_size; + ret = inflate(&zstream->zs, Z_SYNC_FLUSH); + + out_size -= zstream->zs.avail_out; + zstream->crc32 = crc32_data_more(zstream->crc32, + stream->w_buffer + stream->pos, + out_size); + stream->pos += out_size; + + size_t bytes_consumed = size - zstream->zs.avail_in; + i_stream_skip(stream->parent, bytes_consumed); + if (i_stream_get_data_size(stream->parent) > 0 && + (bytes_consumed > 0 || out_size > 0)) { + /* Parent stream was only partially consumed. Set the stream's + IO as pending to avoid hangs. */ + i_stream_set_input_pending(&stream->istream, TRUE); + } + + switch (ret) { + case Z_OK: + break; + case Z_NEED_DICT: + zlib_read_error(zstream, "can't read file without dict"); + stream->istream.stream_errno = EIO; + return -1; + case Z_DATA_ERROR: + zlib_read_error(zstream, "corrupted data"); + stream->istream.stream_errno = EINVAL; + return -1; + case Z_MEM_ERROR: + i_fatal_status(FATAL_OUTOFMEM, "zlib.read(%s): Out of memory", + i_stream_get_name(&stream->istream)); + case Z_STREAM_END: + zstream->eof_offset = stream->istream.v_offset + + (stream->pos - stream->skip); + stream->cached_stream_size = zstream->eof_offset; + zstream->zs.avail_in = 0; + + if (!zstream->trailer_read) { + /* try to read and verify the trailer, we might not + be called again. */ + if (i_stream_zlib_read_trailer(zstream) < 0) + return -1; + } + break; + default: + i_fatal("inflate() failed with %d", ret); + } + if (out_size == 0) { + /* read more input */ + return i_stream_zlib_read(stream); + } + return out_size; +} + +static void i_stream_zlib_init(struct zlib_istream *zstream) +{ + int ret; + + ret = inflateInit2(&zstream->zs, -15); + switch (ret) { + case Z_OK: + break; + case Z_MEM_ERROR: + i_fatal_status(FATAL_OUTOFMEM, "zlib: Out of memory"); + case Z_VERSION_ERROR: + i_fatal("Wrong zlib library version (broken compilation)"); + case Z_STREAM_ERROR: + i_fatal("zlib: Invalid parameters"); + default: + i_fatal("inflateInit() failed with %d", ret); + } + zstream->header_read = !zstream->gz; + zstream->trailer_read = !zstream->gz; +} + +static void i_stream_zlib_reset(struct zlib_istream *zstream) +{ + struct istream_private *stream = &zstream->istream; + + i_stream_seek(stream->parent, stream->parent_start_offset); + zstream->eof_offset = UOFF_T_MAX; + zstream->crc32 = 0; + + zstream->zs.next_in = NULL; + zstream->zs.avail_in = 0; + + stream->parent_expected_offset = stream->parent_start_offset; + stream->skip = stream->pos = 0; + stream->istream.v_offset = 0; + stream->high_pos = 0; + zstream->prev_size = 0; + + (void)inflateEnd(&zstream->zs); + i_stream_zlib_init(zstream); +} + +static void +i_stream_zlib_seek(struct istream_private *stream, uoff_t v_offset, bool mark) +{ + struct zlib_istream *zstream = (struct zlib_istream *) stream; + + if (i_stream_nonseekable_try_seek(stream, v_offset)) + return; + + /* have to seek backwards - reset state and retry */ + i_stream_zlib_reset(zstream); + if (!i_stream_nonseekable_try_seek(stream, v_offset)) + i_unreached(); + + if (mark) + zstream->marked = TRUE; +} + +static void i_stream_zlib_sync(struct istream_private *stream) +{ + struct zlib_istream *zstream = (struct zlib_istream *) stream; + const struct stat *st; + + if (i_stream_stat(stream->parent, FALSE, &st) == 0) { + if (memcmp(&zstream->last_parent_statbuf, + st, sizeof(*st)) == 0) { + /* a compressed file doesn't change unexpectedly, + don't clear our caches unnecessarily */ + return; + } + zstream->last_parent_statbuf = *st; + } + i_stream_zlib_reset(zstream); +} + +static struct istream * +i_stream_create_zlib(struct istream *input, bool gz) +{ + struct zlib_istream *zstream; + + zstream = i_new(struct zlib_istream, 1); + zstream->eof_offset = UOFF_T_MAX; + zstream->gz = gz; + + i_stream_zlib_init(zstream); + + zstream->istream.iostream.close = i_stream_zlib_close; + zstream->istream.max_buffer_size = input->real_stream->max_buffer_size; + zstream->istream.read = i_stream_zlib_read; + zstream->istream.seek = i_stream_zlib_seek; + zstream->istream.sync = i_stream_zlib_sync; + + zstream->istream.istream.readable_fd = FALSE; + zstream->istream.istream.blocking = input->blocking; + zstream->istream.istream.seekable = input->seekable; + + return i_stream_create(&zstream->istream, input, + i_stream_get_fd(input), 0); +} + +struct istream *i_stream_create_gz(struct istream *input) +{ + return i_stream_create_zlib(input, TRUE); +} + +struct istream *i_stream_create_deflate(struct istream *input) +{ + return i_stream_create_zlib(input, FALSE); +} +#endif |