summaryrefslogtreecommitdiffstats
path: root/src/liblzma/common/microlzma_decoder.c
blob: e473373daaae99b9d20f27a4ad23698bf2e37fe6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
///////////////////////////////////////////////////////////////////////////////
//
/// \file       microlzma_decoder.c
/// \brief      Decode MicroLZMA format
//
//  Author:     Lasse Collin
//
//  This file has been put into the public domain.
//  You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////

#include "lzma_decoder.h"
#include "lz_decoder.h"


typedef struct {
	/// LZMA1 decoder
	lzma_next_coder lzma;

	/// Compressed size of the stream as given by the application.
	/// This must be exactly correct.
	///
	/// This will be decremented when input is read.
	uint64_t comp_size;

	/// Uncompressed size of the stream as given by the application.
	/// This may be less than the actual uncompressed size if
	/// uncomp_size_is_exact is false.
	///
	/// This will be decremented when output is produced.
	lzma_vli uncomp_size;

	/// LZMA dictionary size as given by the application
	uint32_t dict_size;

	/// If true, the exact uncompressed size is known. If false,
	/// uncomp_size may be smaller than the real uncompressed size;
	/// uncomp_size may never be bigger than the real uncompressed size.
	bool uncomp_size_is_exact;

	/// True once the first byte of the MicroLZMA stream
	/// has been processed.
	bool props_decoded;
} lzma_microlzma_coder;


static lzma_ret
microlzma_decode(void *coder_ptr, const lzma_allocator *allocator,
		const uint8_t *restrict in, size_t *restrict in_pos,
		size_t in_size, uint8_t *restrict out,
		size_t *restrict out_pos, size_t out_size, lzma_action action)
{
	lzma_microlzma_coder *coder = coder_ptr;

	// Remember the in start position so that we can update comp_size.
	const size_t in_start = *in_pos;

	// Remember the out start position so that we can update uncomp_size.
	const size_t out_start = *out_pos;

	// Limit the amount of input so that the decoder won't read more than
	// comp_size. This is required when uncomp_size isn't exact because
	// in that case the LZMA decoder will try to decode more input even
	// when it has no output space (it can be looking for EOPM).
	if (in_size - *in_pos > coder->comp_size)
		in_size = *in_pos + (size_t)(coder->comp_size);

	// When the exact uncompressed size isn't known, we must limit
	// the available output space to prevent the LZMA decoder from
	// trying to decode too much.
	if (!coder->uncomp_size_is_exact
			&& out_size - *out_pos > coder->uncomp_size)
		out_size = *out_pos + (size_t)(coder->uncomp_size);

	if (!coder->props_decoded) {
		// There must be at least one byte of input to decode
		// the properties byte.
		if (*in_pos >= in_size)
			return LZMA_OK;

		lzma_options_lzma options = {
			.dict_size = coder->dict_size,
			.preset_dict = NULL,
			.preset_dict_size = 0,
			.ext_flags = 0, // EOPM not allowed when size is known
			.ext_size_low = UINT32_MAX, // Unknown size by default
			.ext_size_high = UINT32_MAX,
		};

		if (coder->uncomp_size_is_exact)
			lzma_set_ext_size(options, coder->uncomp_size);

		// The properties are stored as bitwise-negation
		// of the typical encoding.
		if (lzma_lzma_lclppb_decode(&options, ~in[*in_pos]))
			return LZMA_OPTIONS_ERROR;

		++*in_pos;

		// Initialize the decoder.
		lzma_filter_info filters[2] = {
			{
				.id = LZMA_FILTER_LZMA1EXT,
				.init = &lzma_lzma_decoder_init,
				.options = &options,
			}, {
				.init = NULL,
			}
		};

		return_if_error(lzma_next_filter_init(&coder->lzma,
				allocator, filters));

		// Pass one dummy 0x00 byte to the LZMA decoder since that
		// is what it expects the first byte to be.
		const uint8_t dummy_in = 0;
		size_t dummy_in_pos = 0;
		if (coder->lzma.code(coder->lzma.coder, allocator,
				&dummy_in, &dummy_in_pos, 1,
				out, out_pos, out_size, LZMA_RUN) != LZMA_OK)
			return LZMA_PROG_ERROR;

		assert(dummy_in_pos == 1);
		coder->props_decoded = true;
	}

	// The rest is normal LZMA decoding.
	lzma_ret ret = coder->lzma.code(coder->lzma.coder, allocator,
				in, in_pos, in_size,
				out, out_pos, out_size, action);

	// Update the remaining compressed size.
	assert(coder->comp_size >= *in_pos - in_start);
	coder->comp_size -= *in_pos - in_start;

	if (coder->uncomp_size_is_exact) {
		// After successful decompression of the complete stream
		// the compressed size must match.
		if (ret == LZMA_STREAM_END && coder->comp_size != 0)
			ret = LZMA_DATA_ERROR;
	} else {
		// Update the amount of output remaining.
		assert(coder->uncomp_size >= *out_pos - out_start);
		coder->uncomp_size -= *out_pos - out_start;

		// - We must not get LZMA_STREAM_END because the stream
		//   shouldn't have EOPM.
		// - We must use uncomp_size to determine when to
		//   return LZMA_STREAM_END.
		if (ret == LZMA_STREAM_END)
			ret = LZMA_DATA_ERROR;
		else if (coder->uncomp_size == 0)
			ret = LZMA_STREAM_END;
	}

	return ret;
}


static void
microlzma_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
{
	lzma_microlzma_coder *coder = coder_ptr;
	lzma_next_end(&coder->lzma, allocator);
	lzma_free(coder, allocator);
	return;
}


static lzma_ret
microlzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
		uint64_t comp_size,
		uint64_t uncomp_size, bool uncomp_size_is_exact,
		uint32_t dict_size)
{
	lzma_next_coder_init(&microlzma_decoder_init, next, allocator);

	lzma_microlzma_coder *coder = next->coder;

	if (coder == NULL) {
		coder = lzma_alloc(sizeof(lzma_microlzma_coder), allocator);
		if (coder == NULL)
			return LZMA_MEM_ERROR;

		next->coder = coder;
		next->code = &microlzma_decode;
		next->end = &microlzma_decoder_end;

		coder->lzma = LZMA_NEXT_CODER_INIT;
	}

	// The public API is uint64_t but the internal LZ decoder API uses
	// lzma_vli.
	if (uncomp_size > LZMA_VLI_MAX)
		return LZMA_OPTIONS_ERROR;

	coder->comp_size = comp_size;
	coder->uncomp_size = uncomp_size;
	coder->uncomp_size_is_exact = uncomp_size_is_exact;
	coder->dict_size = dict_size;

	coder->props_decoded = false;

	return LZMA_OK;
}


extern LZMA_API(lzma_ret)
lzma_microlzma_decoder(lzma_stream *strm, uint64_t comp_size,
		uint64_t uncomp_size, lzma_bool uncomp_size_is_exact,
		uint32_t dict_size)
{
	lzma_next_strm_init(microlzma_decoder_init, strm, comp_size,
			uncomp_size, uncomp_size_is_exact, dict_size);

	strm->internal->supported_actions[LZMA_RUN] = true;
	strm->internal->supported_actions[LZMA_FINISH] = true;

	return LZMA_OK;
}