summaryrefslogtreecommitdiffstats
path: root/tests/test_microlzma.c
blob: c1d99d77fc3dc311a4cf3f2574f82b883bde6238 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
// SPDX-License-Identifier: 0BSD

///////////////////////////////////////////////////////////////////////////////
//
/// \file       test_microlzma.c
/// \brief      Tests MicroLZMA encoding and decoding
//
//  Author:     Jia Tan
//
///////////////////////////////////////////////////////////////////////////////

#include "tests.h"

#define BUFFER_SIZE 1024


#ifdef HAVE_ENCODER_LZMA1

// MicroLZMA encoded "Hello\nWorld\n" output size in bytes.
#define ENCODED_OUTPUT_SIZE 17

// Byte array of "Hello\nWorld\n". This is used for various encoder tests.
static const uint8_t hello_world[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A,
		0x57, 0x6F, 0x72, 0x6C, 0x64, 0x0A };

// This is the CRC32 value of the MicroLZMA encoding of "Hello\nWorld\n".
// The settings used were based on LZMA_PRESET_DEFAULT as of liblzma 5.6.0.
// This assumes MicroLZMA is correct in liblzma 5.6.0, which is safe
// considering the encoded "Hello\nWorld\n" can successfully be decoded at
// this time. This is to test for regressions that cause MicroLZMA output
// to change.
static const uint32_t hello_world_encoded_crc = 0x3CDE40A8;


// Function implementation borrowed from lzma_decoder.c. It is needed to
// ensure the first byte of a MicroLZMA stream is set correctly with the
// negation of the LZMA properties.
static bool
lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
{
	if (byte > (4 * 5 + 4) * 9 + 8)
		return true;

	// See the file format specification to understand this.
	options->pb = byte / (9 * 5);
	byte -= options->pb * 9 * 5;
	options->lp = byte / 9;
	options->lc = byte - options->lp * 9;

	return options->lc + options->lp > LZMA_LCLP_MAX;
}


///////////////////
// Encoder tests //
///////////////////

// This tests a few of the basic options. These options are not unique to
// MicroLZMA in any way, its mostly ensuring that the options are actually
// being checked before initializing the decoder internals.
static void
test_encode_options(void)
{
	lzma_stream strm = LZMA_STREAM_INIT;
	lzma_options_lzma opt_lzma;

	// Initialize with default options.
	assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT));

	// NULL stream
	assert_lzma_ret(lzma_microlzma_encoder(NULL, &opt_lzma),
			LZMA_PROG_ERROR);

	// lc/lp/pb = 5/0/2 (lc invalid)
	opt_lzma.lc = 5;
	opt_lzma.lp = 0;
	opt_lzma.pb = 2;
	assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
			LZMA_OPTIONS_ERROR);

	// lc/lp/pb = 0/5/2 (lp invalid)
	opt_lzma.lc = 0;
	opt_lzma.lp = 5;
	opt_lzma.pb = 2;
	assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
			LZMA_OPTIONS_ERROR);

	// lc/lp/pb = 3/2/2 (lc + lp invalid)
	opt_lzma.lc = 3;
	opt_lzma.lp = 2;
	opt_lzma.pb = 2;
	assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
			LZMA_OPTIONS_ERROR);

	// lc/lp/pb = 3/0/5 (pb invalid)
	opt_lzma.lc = 3;
	opt_lzma.lp = 0;
	opt_lzma.pb = 5;
	assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
			LZMA_OPTIONS_ERROR);

	// Zero out lp, pb, lc options to not interfere with later tests.
	opt_lzma.lp = 0;
	opt_lzma.pb = 0;
	opt_lzma.lc = 0;

	// Set invalid dictionary size.
	opt_lzma.dict_size = LZMA_DICT_SIZE_MIN - 1;
	assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
			LZMA_OPTIONS_ERROR);

	// Maximum dictionary size for the encoder, as described in lzma12.h
	// is 1.5 GiB.
	opt_lzma.dict_size = (UINT32_C(1) << 30) + (UINT32_C(1) << 29) + 1;
	assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
			LZMA_OPTIONS_ERROR);

	lzma_end(&strm);
}


static void
test_encode_basic(void)
{
	lzma_stream strm = LZMA_STREAM_INIT;
	lzma_options_lzma opt_lzma;

	// The lzma_lzma_preset return value is inverse of what it perhaps
	// should be, that is, it returns false on success.
	assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT));

	// Initialize the encoder using the default options.
	assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK);

	uint8_t output[BUFFER_SIZE];

	strm.next_in = hello_world;
	strm.avail_in = sizeof(hello_world);
	strm.next_out = output;
	strm.avail_out = sizeof(output);

	// Everything must be encoded in one lzma_code() call.
	assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END);

	// Check that the entire input was consumed.
	assert_uint_eq(strm.total_in, sizeof(hello_world));

	// Check that the first byte in the output stream is not 0x00.
	// In a regular raw LZMA stream the first byte is always 0x00.
	// In MicroLZMA the first byte replaced by the bitwise-negation
	// of the LZMA properties.
	assert_uint(output[0], !=, 0x00);

	const uint8_t props = ~output[0];

	lzma_options_lzma test_options;
	assert_false(lzma_lzma_lclppb_decode(&test_options, props));

	assert_uint_eq(opt_lzma.lc, test_options.lc);
	assert_uint_eq(opt_lzma.lp, test_options.lp);
	assert_uint_eq(opt_lzma.pb, test_options.pb);

	// Compute the check over the output data. This is compared to
	// the expected check value.
	const uint32_t check_val = lzma_crc32(output, strm.total_out, 0);

	assert_uint_eq(check_val, hello_world_encoded_crc);

	lzma_end(&strm);
}


// This tests the behavior when strm.avail_out is so small it cannot hold
// the header plus 1 encoded byte (< 6).
static void
test_encode_small_out(void)
{
	lzma_stream strm = LZMA_STREAM_INIT;
	lzma_options_lzma opt_lzma;

	assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT));

	assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK);

	uint8_t output[BUFFER_SIZE];

	strm.next_in = hello_world;
	strm.avail_in = sizeof(hello_world);
	strm.next_out = output;
	strm.avail_out = 5;

	// LZMA_PROG_ERROR is expected when strm.avail_out < 6
	assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_PROG_ERROR);

	// The encoder must be reset because coders cannot be used again
	// after returning LZMA_PROG_ERROR.
	assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK);

	// Reset strm.avail_out to be > 6, but not enough to hold all of the
	// compressed data.
	strm.avail_out = ENCODED_OUTPUT_SIZE - 1;

	// Encoding should not return an error now.
	assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END);
	assert_uint(strm.total_in, <, sizeof(hello_world));

	lzma_end(&strm);
}


// LZMA_FINISH is the only supported action. All others must
// return LZMA_PROG_ERROR.
static void
test_encode_actions(void)
{
	lzma_stream strm = LZMA_STREAM_INIT;
	lzma_options_lzma opt_lzma;

	assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT));

	const lzma_action actions[] = {
		LZMA_RUN,
		LZMA_SYNC_FLUSH,
		LZMA_FULL_FLUSH,
		LZMA_FULL_BARRIER,
	};

	for (size_t i = 0; i < ARRAY_SIZE(actions); ++i) {
		assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
				LZMA_OK);

		uint8_t output[BUFFER_SIZE];

		strm.next_in = hello_world;
		strm.avail_in = sizeof(hello_world);
		strm.next_out = output;
		strm.avail_out = sizeof(output);

		assert_lzma_ret(lzma_code(&strm, actions[i]),
				LZMA_PROG_ERROR);
	}

	lzma_end(&strm);
}
#endif // HAVE_ENCODER_LZMA1


///////////////////
// Decoder tests //
///////////////////

#if defined(HAVE_DECODER_LZMA1) && defined(HAVE_ENCODER_LZMA1)

// Byte array of "Goodbye World!". This is used for various decoder tests.
static const uint8_t goodbye_world[] = { 0x47, 0x6F, 0x6F, 0x64, 0x62,
		0x79, 0x65, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21 };

static uint8_t *goodbye_world_encoded = NULL;
static size_t goodbye_world_encoded_size = 0;


// Helper function to encode data and return the compressed size.
static size_t
basic_microlzma_encode(const uint8_t *input, size_t in_size,
		uint8_t **compressed)
{
	lzma_stream strm = LZMA_STREAM_INIT;
	lzma_options_lzma opt_lzma;

	// Lazy way to set the output size since the input should never
	// inflate by much in these simple test cases. This is tested to
	// be large enough after encoding to fit the entire input, so if
	// this assumption does not hold then this will fail.
	const size_t out_size = in_size << 1;

	*compressed = tuktest_malloc(out_size);

	// Always encode with the default options for simplicity.
	if (lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT))
		goto decoder_setup_error;

	if (lzma_microlzma_encoder(&strm, &opt_lzma) != LZMA_OK)
		goto decoder_setup_error;

	strm.next_in = input;
	strm.avail_in = in_size;
	strm.next_out = *compressed;
	strm.avail_out = out_size;

	if (lzma_code(&strm, LZMA_FINISH) != LZMA_STREAM_END)
		goto decoder_setup_error;

	// Check that the entire input was consumed and that it fit into
	// the output buffer.
	if (strm.total_in != in_size)
		goto decoder_setup_error;

	lzma_end(&strm);

	// lzma_end() doesn't touch other members of lzma_stream than
	// lzma_stream.internal so using strm.total_out here is fine.
	return strm.total_out;

decoder_setup_error:
	tuktest_error("Failed to initialize decoder tests");
	return 0;
}


static void
test_decode_options(void)
{
	// NULL stream
	assert_lzma_ret(lzma_microlzma_decoder(NULL, BUFFER_SIZE,
			sizeof(hello_world), true,
			LZMA_DICT_SIZE_DEFAULT), LZMA_PROG_ERROR);

	// Uncompressed size larger than max
	lzma_stream strm = LZMA_STREAM_INIT;
	assert_lzma_ret(lzma_microlzma_decoder(&strm, BUFFER_SIZE,
			LZMA_VLI_MAX + 1, true, LZMA_DICT_SIZE_DEFAULT),
			LZMA_OPTIONS_ERROR);
}


// Test that decoding succeeds when uncomp_size is correct regardless of
// the value of uncomp_size_is_exact.
static void
test_decode_uncomp_size_is_exact(void)
{
	lzma_stream strm = LZMA_STREAM_INIT;

	assert_lzma_ret(lzma_microlzma_decoder(&strm,
			goodbye_world_encoded_size,
			sizeof(goodbye_world), true,
			LZMA_DICT_SIZE_DEFAULT), LZMA_OK);

	uint8_t output[BUFFER_SIZE];

	strm.next_in = goodbye_world_encoded;
	strm.avail_in = goodbye_world_encoded_size;
	strm.next_out = output;
	strm.avail_out = sizeof(output);

	assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_STREAM_END);
	assert_uint_eq(strm.total_in, goodbye_world_encoded_size);

	assert_uint_eq(strm.total_out, sizeof(goodbye_world));
	assert_array_eq(goodbye_world, output, sizeof(goodbye_world));

	// Reset decoder with uncomp_size_is_exact set to false and
	// uncomp_size set to correct value. Also test using the
	// uncompressed size as the dictionary size.
	assert_lzma_ret(lzma_microlzma_decoder(&strm,
			goodbye_world_encoded_size,
			sizeof(goodbye_world), false,
			sizeof(goodbye_world)), LZMA_OK);

	strm.next_in = goodbye_world_encoded;
	strm.avail_in = goodbye_world_encoded_size;
	strm.next_out = output;
	strm.avail_out = sizeof(output);

	assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_STREAM_END);
	assert_uint_eq(strm.total_in, goodbye_world_encoded_size);

	assert_uint_eq(strm.total_out, sizeof(goodbye_world));
	assert_array_eq(goodbye_world, output, sizeof(goodbye_world));

	lzma_end(&strm);
}


// This tests decoding when MicroLZMA decoder is called with
// an incorrect uncompressed size.
static void
test_decode_uncomp_size_wrong(void)
{
	lzma_stream strm = LZMA_STREAM_INIT;
	assert_lzma_ret(lzma_microlzma_decoder(&strm,
			goodbye_world_encoded_size,
			sizeof(goodbye_world) + 1, false,
			LZMA_DICT_SIZE_DEFAULT), LZMA_OK);

	uint8_t output[BUFFER_SIZE];

	strm.next_in = goodbye_world_encoded;
	strm.avail_in = goodbye_world_encoded_size;
	strm.next_out = output;
	strm.avail_out = sizeof(output);

	// LZMA_OK should be returned because the input size given was
	// larger than the actual encoded size. The decoder is expecting
	// more input to possibly fill the uncompressed size that was set.
	assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK);

	assert_uint_eq(strm.total_out, sizeof(goodbye_world));

	assert_array_eq(goodbye_world, output, sizeof(goodbye_world));

	// Next, test with uncomp_size_is_exact set.
	assert_lzma_ret(lzma_microlzma_decoder(&strm,
			goodbye_world_encoded_size,
			sizeof(goodbye_world) + 1, true,
			LZMA_DICT_SIZE_DEFAULT), LZMA_OK);

	strm.next_in = goodbye_world_encoded;
	strm.avail_in = goodbye_world_encoded_size;
	strm.next_out = output;
	strm.avail_out = sizeof(output);

	// No error detected, even though all input was consumed and there
	// is more room in the output buffer.
	//
	// FIXME? LZMA_FINISH tells that no more input is coming and
	// the MicroLZMA decoder knows the exact compressed size from
	// the initialization as well. So should it return LZMA_DATA_ERROR
	// on the first call instead of relying on the generic lzma_code()
	// logic to eventually get LZMA_BUF_ERROR?
	assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK);
	assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK);
	assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_BUF_ERROR);

	assert_uint_eq(strm.total_out, sizeof(goodbye_world));
	assert_array_eq(goodbye_world, output, sizeof(goodbye_world));

	// Reset stream with uncomp_size smaller than the real
	// uncompressed size.
	assert_lzma_ret(lzma_microlzma_decoder(&strm,
			goodbye_world_encoded_size,
			ARRAY_SIZE(hello_world) - 1, true,
			LZMA_DICT_SIZE_DEFAULT), LZMA_OK);

	strm.next_in = goodbye_world_encoded;
	strm.avail_in = goodbye_world_encoded_size;
	strm.next_out = output;
	strm.avail_out = sizeof(output);

	// This case actually results in an error since it decodes the full
	// uncompressed size but the range coder is not in the proper state
	// for the stream to end.
	assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_DATA_ERROR);

	lzma_end(&strm);
}


static void
test_decode_comp_size_wrong(void)
{
	lzma_stream strm = LZMA_STREAM_INIT;

	// goodbye_world_encoded_size + 1 is safe because extra space was
	// allocated for goodbye_world_encoded. The extra space isn't
	// initialized but it shouldn't be read either, thus Valgrind
	// has to remain happy with this code.
	assert_lzma_ret(lzma_microlzma_decoder(&strm,
			goodbye_world_encoded_size + 1,
			sizeof(goodbye_world), true,
			LZMA_DICT_SIZE_DEFAULT), LZMA_OK);

	uint8_t output[BUFFER_SIZE];

	strm.next_in = goodbye_world_encoded;
	strm.avail_in = goodbye_world_encoded_size;
	strm.next_out = output;
	strm.avail_out = sizeof(output);

	// When uncomp_size_is_exact is set, the compressed size must be
	// correct or else LZMA_DATA_ERROR is returned.
	assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_DATA_ERROR);

	assert_lzma_ret(lzma_microlzma_decoder(&strm,
			goodbye_world_encoded_size + 1,
			sizeof(goodbye_world), false,
			LZMA_DICT_SIZE_DEFAULT), LZMA_OK);

	strm.next_in = goodbye_world_encoded;
	strm.avail_in = goodbye_world_encoded_size;
	strm.next_out = output;
	strm.avail_out = sizeof(output);

	// When uncomp_size_is_exact is not set, the decoder does not
	// detect when the compressed size is wrong as long as all of the
	// expected output has been decoded. This is because the decoder
	// assumes that the real uncompressed size might be bigger than
	// the specified value and in that case more input might be needed
	// as well.
	assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END);

	lzma_end(&strm);
}


static void
test_decode_bad_lzma_properties(void)
{
	// Alter first byte to encode invalid LZMA properties.
	uint8_t *compressed = tuktest_malloc(goodbye_world_encoded_size);
	memcpy(compressed, goodbye_world_encoded, goodbye_world_encoded_size);

	// lc=3, lp=2, pb=2
	compressed[0] = (uint8_t)~0x6FU;

	lzma_stream strm = LZMA_STREAM_INIT;
	assert_lzma_ret(lzma_microlzma_decoder(&strm,
			goodbye_world_encoded_size,
			sizeof(goodbye_world), false,
			LZMA_DICT_SIZE_DEFAULT), LZMA_OK);

	uint8_t output[BUFFER_SIZE];

	strm.next_in = compressed;
	strm.avail_in = goodbye_world_encoded_size;
	strm.next_out = output;
	strm.avail_out = sizeof(output);

	assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_OPTIONS_ERROR);

	// Use valid, but incorrect LZMA properties.
	// lc=3, lp=1, pb=2
	compressed[0] = (uint8_t)~0x66;

	assert_lzma_ret(lzma_microlzma_decoder(&strm,
			goodbye_world_encoded_size,
			ARRAY_SIZE(goodbye_world), true,
			LZMA_DICT_SIZE_DEFAULT), LZMA_OK);

	strm.next_in = compressed;
	strm.avail_in = goodbye_world_encoded_size;
	strm.next_out = output;
	strm.avail_out = sizeof(output);

	assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_DATA_ERROR);

	lzma_end(&strm);
}
#endif


extern int
main(int argc, char **argv)
{
	tuktest_start(argc, argv);

#ifndef HAVE_ENCODER_LZMA1
	tuktest_early_skip("LZMA1 encoder disabled");
#else
	tuktest_run(test_encode_options);
	tuktest_run(test_encode_basic);
	tuktest_run(test_encode_small_out);
	tuktest_run(test_encode_actions);

	// MicroLZMA decoder tests require the basic encoder functionality.
#	ifdef HAVE_DECODER_LZMA1
	goodbye_world_encoded_size = basic_microlzma_encode(goodbye_world,
			sizeof(goodbye_world), &goodbye_world_encoded);

	tuktest_run(test_decode_options);
	tuktest_run(test_decode_uncomp_size_is_exact);
	tuktest_run(test_decode_uncomp_size_wrong);
	tuktest_run(test_decode_comp_size_wrong);
	tuktest_run(test_decode_bad_lzma_properties);
#	endif

	return tuktest_end();
#endif
}