summaryrefslogtreecommitdiffstats
path: root/tests/test_microlzma.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--tests/test_microlzma.c568
1 files changed, 568 insertions, 0 deletions
diff --git a/tests/test_microlzma.c b/tests/test_microlzma.c
new file mode 100644
index 0000000..c1d99d7
--- /dev/null
+++ b/tests/test_microlzma.c
@@ -0,0 +1,568 @@
+// SPDX-License-Identifier: 0BSD
+
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file test_microlzma.c
+/// \brief Tests MicroLZMA encoding and decoding
+//
+// Author: Jia Tan
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "tests.h"
+
+#define BUFFER_SIZE 1024
+
+
+#ifdef HAVE_ENCODER_LZMA1
+
+// MicroLZMA encoded "Hello\nWorld\n" output size in bytes.
+#define ENCODED_OUTPUT_SIZE 17
+
+// Byte array of "Hello\nWorld\n". This is used for various encoder tests.
+static const uint8_t hello_world[] = { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x0A,
+ 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x0A };
+
+// This is the CRC32 value of the MicroLZMA encoding of "Hello\nWorld\n".
+// The settings used were based on LZMA_PRESET_DEFAULT as of liblzma 5.6.0.
+// This assumes MicroLZMA is correct in liblzma 5.6.0, which is safe
+// considering the encoded "Hello\nWorld\n" can successfully be decoded at
+// this time. This is to test for regressions that cause MicroLZMA output
+// to change.
+static const uint32_t hello_world_encoded_crc = 0x3CDE40A8;
+
+
+// Function implementation borrowed from lzma_decoder.c. It is needed to
+// ensure the first byte of a MicroLZMA stream is set correctly with the
+// negation of the LZMA properties.
+static bool
+lzma_lzma_lclppb_decode(lzma_options_lzma *options, uint8_t byte)
+{
+ if (byte > (4 * 5 + 4) * 9 + 8)
+ return true;
+
+ // See the file format specification to understand this.
+ options->pb = byte / (9 * 5);
+ byte -= options->pb * 9 * 5;
+ options->lp = byte / 9;
+ options->lc = byte - options->lp * 9;
+
+ return options->lc + options->lp > LZMA_LCLP_MAX;
+}
+
+
+///////////////////
+// Encoder tests //
+///////////////////
+
+// This tests a few of the basic options. These options are not unique to
+// MicroLZMA in any way, its mostly ensuring that the options are actually
+// being checked before initializing the decoder internals.
+static void
+test_encode_options(void)
+{
+ lzma_stream strm = LZMA_STREAM_INIT;
+ lzma_options_lzma opt_lzma;
+
+ // Initialize with default options.
+ assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT));
+
+ // NULL stream
+ assert_lzma_ret(lzma_microlzma_encoder(NULL, &opt_lzma),
+ LZMA_PROG_ERROR);
+
+ // lc/lp/pb = 5/0/2 (lc invalid)
+ opt_lzma.lc = 5;
+ opt_lzma.lp = 0;
+ opt_lzma.pb = 2;
+ assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
+ LZMA_OPTIONS_ERROR);
+
+ // lc/lp/pb = 0/5/2 (lp invalid)
+ opt_lzma.lc = 0;
+ opt_lzma.lp = 5;
+ opt_lzma.pb = 2;
+ assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
+ LZMA_OPTIONS_ERROR);
+
+ // lc/lp/pb = 3/2/2 (lc + lp invalid)
+ opt_lzma.lc = 3;
+ opt_lzma.lp = 2;
+ opt_lzma.pb = 2;
+ assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
+ LZMA_OPTIONS_ERROR);
+
+ // lc/lp/pb = 3/0/5 (pb invalid)
+ opt_lzma.lc = 3;
+ opt_lzma.lp = 0;
+ opt_lzma.pb = 5;
+ assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
+ LZMA_OPTIONS_ERROR);
+
+ // Zero out lp, pb, lc options to not interfere with later tests.
+ opt_lzma.lp = 0;
+ opt_lzma.pb = 0;
+ opt_lzma.lc = 0;
+
+ // Set invalid dictionary size.
+ opt_lzma.dict_size = LZMA_DICT_SIZE_MIN - 1;
+ assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
+ LZMA_OPTIONS_ERROR);
+
+ // Maximum dictionary size for the encoder, as described in lzma12.h
+ // is 1.5 GiB.
+ opt_lzma.dict_size = (UINT32_C(1) << 30) + (UINT32_C(1) << 29) + 1;
+ assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
+ LZMA_OPTIONS_ERROR);
+
+ lzma_end(&strm);
+}
+
+
+static void
+test_encode_basic(void)
+{
+ lzma_stream strm = LZMA_STREAM_INIT;
+ lzma_options_lzma opt_lzma;
+
+ // The lzma_lzma_preset return value is inverse of what it perhaps
+ // should be, that is, it returns false on success.
+ assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT));
+
+ // Initialize the encoder using the default options.
+ assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK);
+
+ uint8_t output[BUFFER_SIZE];
+
+ strm.next_in = hello_world;
+ strm.avail_in = sizeof(hello_world);
+ strm.next_out = output;
+ strm.avail_out = sizeof(output);
+
+ // Everything must be encoded in one lzma_code() call.
+ assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END);
+
+ // Check that the entire input was consumed.
+ assert_uint_eq(strm.total_in, sizeof(hello_world));
+
+ // Check that the first byte in the output stream is not 0x00.
+ // In a regular raw LZMA stream the first byte is always 0x00.
+ // In MicroLZMA the first byte replaced by the bitwise-negation
+ // of the LZMA properties.
+ assert_uint(output[0], !=, 0x00);
+
+ const uint8_t props = ~output[0];
+
+ lzma_options_lzma test_options;
+ assert_false(lzma_lzma_lclppb_decode(&test_options, props));
+
+ assert_uint_eq(opt_lzma.lc, test_options.lc);
+ assert_uint_eq(opt_lzma.lp, test_options.lp);
+ assert_uint_eq(opt_lzma.pb, test_options.pb);
+
+ // Compute the check over the output data. This is compared to
+ // the expected check value.
+ const uint32_t check_val = lzma_crc32(output, strm.total_out, 0);
+
+ assert_uint_eq(check_val, hello_world_encoded_crc);
+
+ lzma_end(&strm);
+}
+
+
+// This tests the behavior when strm.avail_out is so small it cannot hold
+// the header plus 1 encoded byte (< 6).
+static void
+test_encode_small_out(void)
+{
+ lzma_stream strm = LZMA_STREAM_INIT;
+ lzma_options_lzma opt_lzma;
+
+ assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT));
+
+ assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK);
+
+ uint8_t output[BUFFER_SIZE];
+
+ strm.next_in = hello_world;
+ strm.avail_in = sizeof(hello_world);
+ strm.next_out = output;
+ strm.avail_out = 5;
+
+ // LZMA_PROG_ERROR is expected when strm.avail_out < 6
+ assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_PROG_ERROR);
+
+ // The encoder must be reset because coders cannot be used again
+ // after returning LZMA_PROG_ERROR.
+ assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma), LZMA_OK);
+
+ // Reset strm.avail_out to be > 6, but not enough to hold all of the
+ // compressed data.
+ strm.avail_out = ENCODED_OUTPUT_SIZE - 1;
+
+ // Encoding should not return an error now.
+ assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END);
+ assert_uint(strm.total_in, <, sizeof(hello_world));
+
+ lzma_end(&strm);
+}
+
+
+// LZMA_FINISH is the only supported action. All others must
+// return LZMA_PROG_ERROR.
+static void
+test_encode_actions(void)
+{
+ lzma_stream strm = LZMA_STREAM_INIT;
+ lzma_options_lzma opt_lzma;
+
+ assert_false(lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT));
+
+ const lzma_action actions[] = {
+ LZMA_RUN,
+ LZMA_SYNC_FLUSH,
+ LZMA_FULL_FLUSH,
+ LZMA_FULL_BARRIER,
+ };
+
+ for (size_t i = 0; i < ARRAY_SIZE(actions); ++i) {
+ assert_lzma_ret(lzma_microlzma_encoder(&strm, &opt_lzma),
+ LZMA_OK);
+
+ uint8_t output[BUFFER_SIZE];
+
+ strm.next_in = hello_world;
+ strm.avail_in = sizeof(hello_world);
+ strm.next_out = output;
+ strm.avail_out = sizeof(output);
+
+ assert_lzma_ret(lzma_code(&strm, actions[i]),
+ LZMA_PROG_ERROR);
+ }
+
+ lzma_end(&strm);
+}
+#endif // HAVE_ENCODER_LZMA1
+
+
+///////////////////
+// Decoder tests //
+///////////////////
+
+#if defined(HAVE_DECODER_LZMA1) && defined(HAVE_ENCODER_LZMA1)
+
+// Byte array of "Goodbye World!". This is used for various decoder tests.
+static const uint8_t goodbye_world[] = { 0x47, 0x6F, 0x6F, 0x64, 0x62,
+ 0x79, 0x65, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64, 0x21 };
+
+static uint8_t *goodbye_world_encoded = NULL;
+static size_t goodbye_world_encoded_size = 0;
+
+
+// Helper function to encode data and return the compressed size.
+static size_t
+basic_microlzma_encode(const uint8_t *input, size_t in_size,
+ uint8_t **compressed)
+{
+ lzma_stream strm = LZMA_STREAM_INIT;
+ lzma_options_lzma opt_lzma;
+
+ // Lazy way to set the output size since the input should never
+ // inflate by much in these simple test cases. This is tested to
+ // be large enough after encoding to fit the entire input, so if
+ // this assumption does not hold then this will fail.
+ const size_t out_size = in_size << 1;
+
+ *compressed = tuktest_malloc(out_size);
+
+ // Always encode with the default options for simplicity.
+ if (lzma_lzma_preset(&opt_lzma, LZMA_PRESET_DEFAULT))
+ goto decoder_setup_error;
+
+ if (lzma_microlzma_encoder(&strm, &opt_lzma) != LZMA_OK)
+ goto decoder_setup_error;
+
+ strm.next_in = input;
+ strm.avail_in = in_size;
+ strm.next_out = *compressed;
+ strm.avail_out = out_size;
+
+ if (lzma_code(&strm, LZMA_FINISH) != LZMA_STREAM_END)
+ goto decoder_setup_error;
+
+ // Check that the entire input was consumed and that it fit into
+ // the output buffer.
+ if (strm.total_in != in_size)
+ goto decoder_setup_error;
+
+ lzma_end(&strm);
+
+ // lzma_end() doesn't touch other members of lzma_stream than
+ // lzma_stream.internal so using strm.total_out here is fine.
+ return strm.total_out;
+
+decoder_setup_error:
+ tuktest_error("Failed to initialize decoder tests");
+ return 0;
+}
+
+
+static void
+test_decode_options(void)
+{
+ // NULL stream
+ assert_lzma_ret(lzma_microlzma_decoder(NULL, BUFFER_SIZE,
+ sizeof(hello_world), true,
+ LZMA_DICT_SIZE_DEFAULT), LZMA_PROG_ERROR);
+
+ // Uncompressed size larger than max
+ lzma_stream strm = LZMA_STREAM_INIT;
+ assert_lzma_ret(lzma_microlzma_decoder(&strm, BUFFER_SIZE,
+ LZMA_VLI_MAX + 1, true, LZMA_DICT_SIZE_DEFAULT),
+ LZMA_OPTIONS_ERROR);
+}
+
+
+// Test that decoding succeeds when uncomp_size is correct regardless of
+// the value of uncomp_size_is_exact.
+static void
+test_decode_uncomp_size_is_exact(void)
+{
+ lzma_stream strm = LZMA_STREAM_INIT;
+
+ assert_lzma_ret(lzma_microlzma_decoder(&strm,
+ goodbye_world_encoded_size,
+ sizeof(goodbye_world), true,
+ LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
+
+ uint8_t output[BUFFER_SIZE];
+
+ strm.next_in = goodbye_world_encoded;
+ strm.avail_in = goodbye_world_encoded_size;
+ strm.next_out = output;
+ strm.avail_out = sizeof(output);
+
+ assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_STREAM_END);
+ assert_uint_eq(strm.total_in, goodbye_world_encoded_size);
+
+ assert_uint_eq(strm.total_out, sizeof(goodbye_world));
+ assert_array_eq(goodbye_world, output, sizeof(goodbye_world));
+
+ // Reset decoder with uncomp_size_is_exact set to false and
+ // uncomp_size set to correct value. Also test using the
+ // uncompressed size as the dictionary size.
+ assert_lzma_ret(lzma_microlzma_decoder(&strm,
+ goodbye_world_encoded_size,
+ sizeof(goodbye_world), false,
+ sizeof(goodbye_world)), LZMA_OK);
+
+ strm.next_in = goodbye_world_encoded;
+ strm.avail_in = goodbye_world_encoded_size;
+ strm.next_out = output;
+ strm.avail_out = sizeof(output);
+
+ assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_STREAM_END);
+ assert_uint_eq(strm.total_in, goodbye_world_encoded_size);
+
+ assert_uint_eq(strm.total_out, sizeof(goodbye_world));
+ assert_array_eq(goodbye_world, output, sizeof(goodbye_world));
+
+ lzma_end(&strm);
+}
+
+
+// This tests decoding when MicroLZMA decoder is called with
+// an incorrect uncompressed size.
+static void
+test_decode_uncomp_size_wrong(void)
+{
+ lzma_stream strm = LZMA_STREAM_INIT;
+ assert_lzma_ret(lzma_microlzma_decoder(&strm,
+ goodbye_world_encoded_size,
+ sizeof(goodbye_world) + 1, false,
+ LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
+
+ uint8_t output[BUFFER_SIZE];
+
+ strm.next_in = goodbye_world_encoded;
+ strm.avail_in = goodbye_world_encoded_size;
+ strm.next_out = output;
+ strm.avail_out = sizeof(output);
+
+ // LZMA_OK should be returned because the input size given was
+ // larger than the actual encoded size. The decoder is expecting
+ // more input to possibly fill the uncompressed size that was set.
+ assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK);
+
+ assert_uint_eq(strm.total_out, sizeof(goodbye_world));
+
+ assert_array_eq(goodbye_world, output, sizeof(goodbye_world));
+
+ // Next, test with uncomp_size_is_exact set.
+ assert_lzma_ret(lzma_microlzma_decoder(&strm,
+ goodbye_world_encoded_size,
+ sizeof(goodbye_world) + 1, true,
+ LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
+
+ strm.next_in = goodbye_world_encoded;
+ strm.avail_in = goodbye_world_encoded_size;
+ strm.next_out = output;
+ strm.avail_out = sizeof(output);
+
+ // No error detected, even though all input was consumed and there
+ // is more room in the output buffer.
+ //
+ // FIXME? LZMA_FINISH tells that no more input is coming and
+ // the MicroLZMA decoder knows the exact compressed size from
+ // the initialization as well. So should it return LZMA_DATA_ERROR
+ // on the first call instead of relying on the generic lzma_code()
+ // logic to eventually get LZMA_BUF_ERROR?
+ assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK);
+ assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_OK);
+ assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_BUF_ERROR);
+
+ assert_uint_eq(strm.total_out, sizeof(goodbye_world));
+ assert_array_eq(goodbye_world, output, sizeof(goodbye_world));
+
+ // Reset stream with uncomp_size smaller than the real
+ // uncompressed size.
+ assert_lzma_ret(lzma_microlzma_decoder(&strm,
+ goodbye_world_encoded_size,
+ ARRAY_SIZE(hello_world) - 1, true,
+ LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
+
+ strm.next_in = goodbye_world_encoded;
+ strm.avail_in = goodbye_world_encoded_size;
+ strm.next_out = output;
+ strm.avail_out = sizeof(output);
+
+ // This case actually results in an error since it decodes the full
+ // uncompressed size but the range coder is not in the proper state
+ // for the stream to end.
+ assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_DATA_ERROR);
+
+ lzma_end(&strm);
+}
+
+
+static void
+test_decode_comp_size_wrong(void)
+{
+ lzma_stream strm = LZMA_STREAM_INIT;
+
+ // goodbye_world_encoded_size + 1 is safe because extra space was
+ // allocated for goodbye_world_encoded. The extra space isn't
+ // initialized but it shouldn't be read either, thus Valgrind
+ // has to remain happy with this code.
+ assert_lzma_ret(lzma_microlzma_decoder(&strm,
+ goodbye_world_encoded_size + 1,
+ sizeof(goodbye_world), true,
+ LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
+
+ uint8_t output[BUFFER_SIZE];
+
+ strm.next_in = goodbye_world_encoded;
+ strm.avail_in = goodbye_world_encoded_size;
+ strm.next_out = output;
+ strm.avail_out = sizeof(output);
+
+ // When uncomp_size_is_exact is set, the compressed size must be
+ // correct or else LZMA_DATA_ERROR is returned.
+ assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_DATA_ERROR);
+
+ assert_lzma_ret(lzma_microlzma_decoder(&strm,
+ goodbye_world_encoded_size + 1,
+ sizeof(goodbye_world), false,
+ LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
+
+ strm.next_in = goodbye_world_encoded;
+ strm.avail_in = goodbye_world_encoded_size;
+ strm.next_out = output;
+ strm.avail_out = sizeof(output);
+
+ // When uncomp_size_is_exact is not set, the decoder does not
+ // detect when the compressed size is wrong as long as all of the
+ // expected output has been decoded. This is because the decoder
+ // assumes that the real uncompressed size might be bigger than
+ // the specified value and in that case more input might be needed
+ // as well.
+ assert_lzma_ret(lzma_code(&strm, LZMA_FINISH), LZMA_STREAM_END);
+
+ lzma_end(&strm);
+}
+
+
+static void
+test_decode_bad_lzma_properties(void)
+{
+ // Alter first byte to encode invalid LZMA properties.
+ uint8_t *compressed = tuktest_malloc(goodbye_world_encoded_size);
+ memcpy(compressed, goodbye_world_encoded, goodbye_world_encoded_size);
+
+ // lc=3, lp=2, pb=2
+ compressed[0] = (uint8_t)~0x6FU;
+
+ lzma_stream strm = LZMA_STREAM_INIT;
+ assert_lzma_ret(lzma_microlzma_decoder(&strm,
+ goodbye_world_encoded_size,
+ sizeof(goodbye_world), false,
+ LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
+
+ uint8_t output[BUFFER_SIZE];
+
+ strm.next_in = compressed;
+ strm.avail_in = goodbye_world_encoded_size;
+ strm.next_out = output;
+ strm.avail_out = sizeof(output);
+
+ assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_OPTIONS_ERROR);
+
+ // Use valid, but incorrect LZMA properties.
+ // lc=3, lp=1, pb=2
+ compressed[0] = (uint8_t)~0x66;
+
+ assert_lzma_ret(lzma_microlzma_decoder(&strm,
+ goodbye_world_encoded_size,
+ ARRAY_SIZE(goodbye_world), true,
+ LZMA_DICT_SIZE_DEFAULT), LZMA_OK);
+
+ strm.next_in = compressed;
+ strm.avail_in = goodbye_world_encoded_size;
+ strm.next_out = output;
+ strm.avail_out = sizeof(output);
+
+ assert_lzma_ret(lzma_code(&strm, LZMA_RUN), LZMA_DATA_ERROR);
+
+ lzma_end(&strm);
+}
+#endif
+
+
+extern int
+main(int argc, char **argv)
+{
+ tuktest_start(argc, argv);
+
+#ifndef HAVE_ENCODER_LZMA1
+ tuktest_early_skip("LZMA1 encoder disabled");
+#else
+ tuktest_run(test_encode_options);
+ tuktest_run(test_encode_basic);
+ tuktest_run(test_encode_small_out);
+ tuktest_run(test_encode_actions);
+
+ // MicroLZMA decoder tests require the basic encoder functionality.
+# ifdef HAVE_DECODER_LZMA1
+ goodbye_world_encoded_size = basic_microlzma_encode(goodbye_world,
+ sizeof(goodbye_world), &goodbye_world_encoded);
+
+ tuktest_run(test_decode_options);
+ tuktest_run(test_decode_uncomp_size_is_exact);
+ tuktest_run(test_decode_uncomp_size_wrong);
+ tuktest_run(test_decode_comp_size_wrong);
+ tuktest_run(test_decode_bad_lzma_properties);
+# endif
+
+ return tuktest_end();
+#endif
+}