diff options
Diffstat (limited to 'lib/compression')
-rw-r--r-- | lib/compression/lzxpress.c | 347 | ||||
-rw-r--r-- | lib/compression/lzxpress.h | 50 | ||||
-rw-r--r-- | lib/compression/testsuite.c | 483 | ||||
-rw-r--r-- | lib/compression/wscript_build | 6 |
4 files changed, 886 insertions, 0 deletions
diff --git a/lib/compression/lzxpress.c b/lib/compression/lzxpress.c new file mode 100644 index 0000000..6b2aeef --- /dev/null +++ b/lib/compression/lzxpress.c @@ -0,0 +1,347 @@ +/* + * Copyright (C) Matthieu Suiche 2008 + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the author nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include "replace.h" +#include "lzxpress.h" +#include "../lib/util/byteorder.h" + + +#define __CHECK_BYTES(__size, __index, __needed) do { \ + if (unlikely(__index >= __size)) { \ + return -1; \ + } else { \ + uint32_t __avail = __size - __index; \ + if (unlikely(__needed > __avail)) { \ + return -1; \ + } \ + } \ +} while(0) + +#define CHECK_INPUT_BYTES(__needed) \ + __CHECK_BYTES(uncompressed_size, uncompressed_pos, __needed) +#define CHECK_OUTPUT_BYTES(__needed) \ + __CHECK_BYTES(max_compressed_size, compressed_pos, __needed) + +ssize_t lzxpress_compress(const uint8_t *uncompressed, + uint32_t uncompressed_size, + uint8_t *compressed, + uint32_t max_compressed_size) +{ + /* + * This is the algorithm in [MS-XCA] 2.3 "Plain LZ77 Compression". + * + * It avoids Huffman encoding by including literal bytes inline when a + * match is not found. Every so often it includes a uint32 bit map + * flagging which positions contain matches and which contain + * literals. The encoding of matches is of variable size, depending on + * the match length; they are always at least 16 bits long, and can + * implicitly use unused half-bytes from earlier in the stream. + */ + uint32_t uncompressed_pos, compressed_pos; + uint32_t indic; + uint32_t indic_pos; + uint32_t indic_bit, nibble_index; + + if (!uncompressed_size) { + return 0; + } + + uncompressed_pos = 0; + compressed_pos = 0; + indic = 0; + CHECK_OUTPUT_BYTES(sizeof(uint32_t)); + PUSH_LE_U32(compressed, compressed_pos, 0); + compressed_pos += sizeof(uint32_t); + indic_pos = 0; + + indic_bit = 0; + nibble_index = 0; + + while ((uncompressed_pos < uncompressed_size) && + (compressed_pos < max_compressed_size)) { + bool found = false; + + uint32_t best_len = 2; + uint32_t best_offset = 0; + + int32_t offset; + + const uint32_t max_offset = MIN(0x2000, uncompressed_pos); + /* maximum len we can encode into metadata */ + const uint32_t max_len = MIN(0xFFFF + 3, uncompressed_size - uncompressed_pos); + + /* search for the longest match in the window for the lookahead buffer */ + for (offset = 1; (uint32_t)offset <= max_offset; offset++) { + uint32_t len; + + for (len = 0; + (len < max_len) && (uncompressed[uncompressed_pos + len] == + uncompressed[uncompressed_pos + len - offset]); + len++); + + /* + * We check if len is better than the value found before, including the + * sequence of identical bytes + */ + if (len > best_len) { + found = true; + best_len = len; + best_offset = offset; + if (best_len == max_len) { + /* We're not going to do better than this */ + break; + } + } + } + + if (!found) { + /* + * This is going to literal byte, which we flag by + * setting a bit in an indicator field somewhere + * earlier in the stream. + */ + CHECK_INPUT_BYTES(sizeof(uint8_t)); + CHECK_OUTPUT_BYTES(sizeof(uint8_t)); + compressed[compressed_pos++] = uncompressed[uncompressed_pos++]; + + indic <<= 1; + indic_bit += 1; + + if (indic_bit == 32) { + PUSH_LE_U32(compressed, indic_pos, indic); + indic_bit = 0; + CHECK_OUTPUT_BYTES(sizeof(uint32_t)); + indic_pos = compressed_pos; + compressed_pos += sizeof(uint32_t); + } + } else { + uint32_t match_len = best_len; + + uint16_t metadata; + + match_len -= 3; + best_offset -= 1; + + /* Classical meta-data */ + CHECK_OUTPUT_BYTES(sizeof(uint16_t)); + metadata = (uint16_t)((best_offset << 3) | MIN(match_len, 7)); + PUSH_LE_U16(compressed, compressed_pos, metadata); + compressed_pos += sizeof(uint16_t); + + if (match_len >= 7) { + match_len -= 7; + + if (!nibble_index) { + nibble_index = compressed_pos; + + CHECK_OUTPUT_BYTES(sizeof(uint8_t)); + compressed[nibble_index] = MIN(match_len, 15); + compressed_pos += sizeof(uint8_t); + } else { + compressed[nibble_index] |= MIN(match_len, 15) << 4; + nibble_index = 0; + } + + if (match_len >= 15) { + match_len -= 15; + + CHECK_OUTPUT_BYTES(sizeof(uint8_t)); + compressed[compressed_pos] = MIN(match_len, 255); + compressed_pos += sizeof(uint8_t); + + if (match_len >= 255) { + /* Additional match_len */ + + match_len += 7 + 15; + + if (match_len < (1 << 16)) { + CHECK_OUTPUT_BYTES(sizeof(uint16_t)); + PUSH_LE_U16(compressed, compressed_pos, match_len); + compressed_pos += sizeof(uint16_t); + } else { + CHECK_OUTPUT_BYTES(sizeof(uint16_t) + sizeof(uint32_t)); + PUSH_LE_U16(compressed, compressed_pos, 0); + compressed_pos += sizeof(uint16_t); + + PUSH_LE_U32(compressed, compressed_pos, match_len); + compressed_pos += sizeof(uint32_t); + } + } + } + } + + indic = (indic << 1) | 1; + indic_bit += 1; + + if (indic_bit == 32) { + PUSH_LE_U32(compressed, indic_pos, indic); + indic_bit = 0; + CHECK_OUTPUT_BYTES(sizeof(uint32_t)); + indic_pos = compressed_pos; + compressed_pos += sizeof(uint32_t); + } + + uncompressed_pos += best_len; + } + } + + if (indic_bit != 0) { + indic <<= 32 - indic_bit; + } + indic |= UINT32_MAX >> indic_bit; + PUSH_LE_U32(compressed, indic_pos, indic); + + return compressed_pos; +} + +ssize_t lzxpress_decompress(const uint8_t *input, + uint32_t input_size, + uint8_t *output, + uint32_t max_output_size) +{ + /* + * This is the algorithm in [MS-XCA] 2.4 "Plain LZ77 Decompression + * Algorithm Details". + */ + uint32_t output_index, input_index; + uint32_t indicator, indicator_bit; + uint32_t nibble_index; + + if (input_size == 0) { + return 0; + } + + output_index = 0; + input_index = 0; + indicator = 0; + indicator_bit = 0; + nibble_index = 0; + +#undef CHECK_INPUT_BYTES +#define CHECK_INPUT_BYTES(__needed) \ + __CHECK_BYTES(input_size, input_index, __needed) +#undef CHECK_OUTPUT_BYTES +#define CHECK_OUTPUT_BYTES(__needed) \ + __CHECK_BYTES(max_output_size, output_index, __needed) + + do { + if (indicator_bit == 0) { + CHECK_INPUT_BYTES(sizeof(uint32_t)); + indicator = PULL_LE_U32(input, input_index); + input_index += sizeof(uint32_t); + if (input_index == input_size) { + /* + * The compressor left room for indicator + * flags for data that doesn't exist. + */ + break; + } + indicator_bit = 32; + } + indicator_bit--; + + /* + * check whether the bit specified by indicator_bit is set or not + * set in indicator. For example, if indicator_bit has value 4 + * check whether the 4th bit of the value in indicator is set + */ + if (((indicator >> indicator_bit) & 1) == 0) { + CHECK_INPUT_BYTES(sizeof(uint8_t)); + CHECK_OUTPUT_BYTES(sizeof(uint8_t)); + output[output_index] = input[input_index]; + input_index += sizeof(uint8_t); + output_index += sizeof(uint8_t); + } else { + uint32_t length; + uint32_t offset; + + CHECK_INPUT_BYTES(sizeof(uint16_t)); + length = PULL_LE_U16(input, input_index); + input_index += sizeof(uint16_t); + offset = (length >> 3) + 1; + length &= 7; + + if (length == 7) { + if (nibble_index == 0) { + CHECK_INPUT_BYTES(sizeof(uint8_t)); + nibble_index = input_index; + length = input[input_index] & 0xf; + input_index += sizeof(uint8_t); + } else { + length = input[nibble_index] >> 4; + nibble_index = 0; + } + + if (length == 15) { + CHECK_INPUT_BYTES(sizeof(uint8_t)); + length = input[input_index]; + input_index += sizeof(uint8_t); + if (length == 255) { + CHECK_INPUT_BYTES(sizeof(uint16_t)); + length = PULL_LE_U16(input, input_index); + input_index += sizeof(uint16_t); + if (length == 0) { + CHECK_INPUT_BYTES(sizeof(uint32_t)); + length = PULL_LE_U32(input, input_index); + input_index += sizeof(uint32_t); + } + + if (length < (15 + 7)) { + return -1; + } + length -= (15 + 7); + } + length += 15; + } + length += 7; + } + length += 3; + + if (length == 0) { + return -1; + } + + for (; length > 0; --length) { + if (offset > output_index) { + return -1; + } + CHECK_OUTPUT_BYTES(sizeof(uint8_t)); + output[output_index] = output[output_index - offset]; + output_index += sizeof(uint8_t); + } + } + } while ((output_index < max_output_size) && (input_index < (input_size))); + + return output_index; +} diff --git a/lib/compression/lzxpress.h b/lib/compression/lzxpress.h new file mode 100644 index 0000000..df0ee59 --- /dev/null +++ b/lib/compression/lzxpress.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) Matthieu Suiche 2008 + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the author nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef _LZXPRESS_H +#define _LZXPRESS_H + +#define XPRESS_BLOCK_SIZE 0x10000 + +ssize_t lzxpress_compress(const uint8_t *uncompressed, + uint32_t uncompressed_size, + uint8_t *compressed, + uint32_t max_compressed_size); + +ssize_t lzxpress_decompress(const uint8_t *input, + uint32_t input_size, + uint8_t *output, + uint32_t max_output_size); + +#endif /* _LZXPRESS_H */ diff --git a/lib/compression/testsuite.c b/lib/compression/testsuite.c new file mode 100644 index 0000000..d4dc41b --- /dev/null +++ b/lib/compression/testsuite.c @@ -0,0 +1,483 @@ +/* + Unix SMB/CIFS implementation. + test suite for the compression functions + + Copyright (C) Jelmer Vernooij 2007 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" +#include "torture/torture.h" +#include "torture/local/proto.h" +#include "talloc.h" +#include "lzxpress.h" +#include "lib/util/base64.h" + +/* Tests based on [MS-XCA] 3.1 Examples */ +static bool test_msft_data1( + struct torture_context *test +) +{ + TALLOC_CTX *tmp_ctx = talloc_new(test); + + const char *fixed_data = "abcdefghijklmnopqrstuvwxyz"; + const uint8_t fixed_out[] = { + 0x3f, 0x00, 0x00, 0x00, 0x61, 0x62, 0x63, 0x64, + 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, + 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, + 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a }; + + ssize_t c_size; + uint8_t *out, *out2; + + out = talloc_size(tmp_ctx, 2048); + memset(out, 0x42, talloc_get_size(out)); + + torture_comment(test, "lzxpress fixed compression\n"); + c_size = lzxpress_compress((const uint8_t *)fixed_data, + strlen(fixed_data), + out, + talloc_get_size(out)); + + torture_assert_int_equal(test, c_size, sizeof(fixed_out), + "fixed lzxpress_compress size"); + torture_assert_mem_equal(test, out, fixed_out, c_size, + "fixed lzxpress_compress data"); + + torture_comment(test, "lzxpress fixed decompression\n"); + out2 = talloc_size(tmp_ctx, strlen(fixed_data)); + c_size = lzxpress_decompress(out, + sizeof(fixed_out), + out2, + talloc_get_size(out2)); + + torture_assert_int_equal(test, c_size, strlen(fixed_data), + "fixed lzxpress_decompress size"); + torture_assert_mem_equal(test, out2, fixed_data, c_size, + "fixed lzxpress_decompress data"); + + talloc_free(tmp_ctx); + return true; +} + + +static bool test_msft_data2( + struct torture_context *test +) +{ + TALLOC_CTX *tmp_ctx = talloc_new(test); + + const char *fixed_data = + "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc" + "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc" + "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc" + "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc" + "abcabcabcabcabcabcabcabc"; + const uint8_t fixed_out[] = { + 0xff, 0xff, 0xff, 0x1f, 0x61, 0x62, 0x63, 0x17, + 0x00, 0x0f, 0xff, 0x26, 0x01}; + + ssize_t c_size; + uint8_t *out, *out2; + + out = talloc_size(tmp_ctx, 2048); + memset(out, 0x42, talloc_get_size(out)); + + torture_comment(test, "lzxpress fixed compression\n"); + c_size = lzxpress_compress((const uint8_t *)fixed_data, + strlen(fixed_data), + out, + talloc_get_size(out)); + + torture_assert_int_equal(test, c_size, sizeof(fixed_out), + "fixed lzxpress_compress size"); + torture_assert_mem_equal(test, out, fixed_out, c_size, + "fixed lzxpress_compress data"); + + torture_comment(test, "lzxpress fixed decompression\n"); + out2 = talloc_size(tmp_ctx, strlen(fixed_data)); + c_size = lzxpress_decompress(out, + sizeof(fixed_out), + out2, + talloc_get_size(out2)); + + torture_comment(test, "out2: %.*s\n", (int)c_size, (char *)out2); + + torture_assert_int_equal(test, c_size, strlen(fixed_data), + "fixed lzxpress_decompress size"); + torture_assert_mem_equal(test, out2, fixed_data, c_size, + "fixed lzxpress_decompress data"); + + talloc_free(tmp_ctx); + return true; +} + +/* + test lzxpress + */ +static bool test_lzxpress(struct torture_context *test) +{ + TALLOC_CTX *tmp_ctx = talloc_new(test); + const char *fixed_data = "this is a test. and this is a test too"; + const uint8_t fixed_out[] = { + 0xff, 0x21, 0x00, 0x04, 0x74, 0x68, 0x69, 0x73, + 0x20, 0x10, 0x00, 0x61, 0x20, 0x74, 0x65, 0x73, + 0x74, 0x2E, 0x20, 0x61, 0x6E, 0x64, 0x20, 0x9F, + 0x00, 0x04, 0x20, 0x74, 0x6F, 0x6F }; + + const uint8_t fixed_out_old_version[] = { + 0x00, 0x20, 0x00, 0x04, 0x74, 0x68, 0x69, 0x73, + 0x20, 0x10, 0x00, 0x61, 0x20, 0x74, 0x65, 0x73, + 0x74, 0x2E, 0x20, 0x61, 0x6E, 0x64, 0x20, 0x9F, + 0x00, 0x04, 0x20, 0x74, 0x6F, 0x6F, 0x00, 0x00, + 0x00, 0x00 }; + + ssize_t c_size; + uint8_t *out, *out2, *out3; + + out = talloc_size(tmp_ctx, 2048); + memset(out, 0x42, talloc_get_size(out)); + + torture_comment(test, "lzxpress fixed compression\n"); + c_size = lzxpress_compress((const uint8_t *)fixed_data, + strlen(fixed_data), + out, + talloc_get_size(out)); + + torture_assert_int_equal(test, c_size, sizeof(fixed_out), + "fixed lzxpress_compress size"); + torture_assert_mem_equal(test, out, fixed_out, c_size, + "fixed lzxpress_compress data"); + + torture_comment(test, "lzxpress fixed decompression\n"); + out2 = talloc_size(tmp_ctx, strlen(fixed_data)); + c_size = lzxpress_decompress(out, + sizeof(fixed_out), + out2, + talloc_get_size(out2)); + + torture_assert_int_equal(test, c_size, strlen(fixed_data), + "fixed lzxpress_decompress size"); + torture_assert_mem_equal(test, out2, fixed_data, c_size, + "fixed lzxpress_decompress data"); + + + torture_comment(test, "lzxpress fixed decompression (old data)\n"); + out3 = talloc_size(tmp_ctx, strlen(fixed_data)); + c_size = lzxpress_decompress(fixed_out_old_version, + sizeof(fixed_out_old_version), + out3, + talloc_get_size(out3)); + + torture_assert_int_equal(test, c_size, strlen(fixed_data), + "fixed lzxpress_decompress size"); + torture_assert_mem_equal(test, out3, fixed_data, c_size, + "fixed lzxpress_decompress data"); + + talloc_free(tmp_ctx); + return true; +} + +static bool test_lzxpress2(struct torture_context *test) +{ + /* + * Use two matches, separated by a literal, and each with a length + * greater than 10, to test the use of nibble_index. Both length values + * (less ten) should be stored as adjacent nibbles to form the 0x21 + * byte. + */ + + TALLOC_CTX *tmp_ctx = talloc_new(test); + const char *fixed_data = "aaaaaaaaaaaabaaaaaaaaaaaa"; + const uint8_t fixed_out[] = { + 0xff, 0xff, 0xff, 0x5f, 0x61, 0x07, 0x00, 0x21, + 0x62, 0x67, 0x00}; + + ssize_t c_size; + uint8_t *out, *out2; + + out = talloc_size(tmp_ctx, 2048); + memset(out, 0x42, talloc_get_size(out)); + + torture_comment(test, "lzxpress fixed compression\n"); + c_size = lzxpress_compress((const uint8_t *)fixed_data, + strlen(fixed_data), + out, + talloc_get_size(out)); + + torture_assert_int_equal(test, c_size, sizeof(fixed_out), + "fixed lzxpress_compress size"); + torture_assert_mem_equal(test, out, fixed_out, c_size, + "fixed lzxpress_compress data"); + + torture_comment(test, "lzxpress fixed decompression\n"); + out2 = talloc_size(tmp_ctx, strlen(fixed_data)); + c_size = lzxpress_decompress(out, + sizeof(fixed_out), + out2, + talloc_get_size(out2)); + + torture_assert_int_equal(test, c_size, strlen(fixed_data), + "fixed lzxpress_decompress size"); + torture_assert_mem_equal(test, out2, fixed_data, c_size, + "fixed lzxpress_decompress data"); + + talloc_free(tmp_ctx); + return true; +} + +static bool test_lzxpress3(struct torture_context *test) +{ + /* + * Use a series of 31 literals, followed by a single minimum-length + * match (and a terminating literal), to test setting indic_pos when the + * 32-bit flags value overflows after a match. + */ + + TALLOC_CTX *tmp_ctx = talloc_new(test); + const char *fixed_data = "abcdefghijklmnopqrstuvwxyz01234abca"; + const uint8_t fixed_out[] = { + 0x01, 0x00, 0x00, 0x00, 0x61, 0x62, 0x63, 0x64, + 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, + 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, + 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x30, 0x31, + 0x32, 0x33, 0x34, 0xf0, 0x00, 0xff, 0xff, 0xff, + 0x7f, 0x61}; + + ssize_t c_size; + uint8_t *out, *out2; + + out = talloc_size(tmp_ctx, 2048); + memset(out, 0x42, talloc_get_size(out)); + + torture_comment(test, "lzxpress fixed compression\n"); + c_size = lzxpress_compress((const uint8_t *)fixed_data, + strlen(fixed_data), + out, + talloc_get_size(out)); + + torture_assert_int_equal(test, c_size, sizeof(fixed_out), + "fixed lzxpress_compress size"); + torture_assert_mem_equal(test, out, fixed_out, c_size, + "fixed lzxpress_compress data"); + + torture_comment(test, "lzxpress fixed decompression\n"); + out2 = talloc_size(tmp_ctx, strlen(fixed_data)); + c_size = lzxpress_decompress(out, + sizeof(fixed_out), + out2, + talloc_get_size(out2)); + + torture_assert_int_equal(test, c_size, strlen(fixed_data), + "fixed lzxpress_decompress size"); + torture_assert_mem_equal(test, out2, fixed_data, c_size, + "fixed lzxpress_decompress data"); + + talloc_free(tmp_ctx); + return true; +} + +static bool test_lzxpress4(struct torture_context *test) +{ + /* + * Use a series of 31 literals, followed by a single minimum-length + * match, to test that the final set of 32-bit flags is written + * correctly when it is empty. + */ + + TALLOC_CTX *tmp_ctx = talloc_new(test); + const char *fixed_data = "abcdefghijklmnopqrstuvwxyz01234abc"; + const uint8_t fixed_out[] = { + 0x01, 0x00, 0x00, 0x00, 0x61, 0x62, 0x63, 0x64, + 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, + 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, + 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x30, 0x31, + 0x32, 0x33, 0x34, 0xf0, 0x00, 0xff, 0xff, 0xff, + 0xff}; + + ssize_t c_size; + uint8_t *out, *out2; + + out = talloc_size(tmp_ctx, 2048); + memset(out, 0x42, talloc_get_size(out)); + + torture_comment(test, "lzxpress fixed compression\n"); + c_size = lzxpress_compress((const uint8_t *)fixed_data, + strlen(fixed_data), + out, + talloc_get_size(out)); + + torture_assert_int_equal(test, c_size, sizeof(fixed_out), + "fixed lzxpress_compress size"); + torture_assert_mem_equal(test, out, fixed_out, c_size, + "fixed lzxpress_compress data"); + + torture_comment(test, "lzxpress fixed decompression\n"); + out2 = talloc_size(tmp_ctx, strlen(fixed_data)); + c_size = lzxpress_decompress(out, + sizeof(fixed_out), + out2, + talloc_get_size(out2)); + + torture_assert_int_equal(test, c_size, strlen(fixed_data), + "fixed lzxpress_decompress size"); + torture_assert_mem_equal(test, out2, fixed_data, c_size, + "fixed lzxpress_decompress data"); + + talloc_free(tmp_ctx); + return true; +} + + +static bool test_lzxpress_many_zeros(struct torture_context *test) +{ + /* + * Repeated values (zero is convenient but not special) will lead to + * very long substring searches in compression, which can be very slow + * if we're not careful. + * + * This test makes a very loose assertion about how long it should + * take to compress a million zeros. + * + * Wall clock time *should* be < 0.1 seconds with the fix and around a + * minute without it. We try for CLOCK_THREAD_CPUTIME_ID which should + * filter out some noise on the machine, and set the threshold at 5 + * seconds. + */ + + TALLOC_CTX *tmp_ctx = talloc_new(test); + const size_t N_ZEROS = 1000000; + const uint8_t *zeros = talloc_zero_size(tmp_ctx, N_ZEROS); + const ssize_t expected_c_size = 93; + ssize_t c_size; + uint8_t *comp, *decomp; + static struct timespec t_start, t_end; + uint64_t elapsed_ns; + + if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &t_start) != 0) { + if (clock_gettime(CUSTOM_CLOCK_MONOTONIC, &t_start) != 0) { + clock_gettime(CLOCK_REALTIME, &t_start); + } + } + + comp = talloc_zero_size(tmp_ctx, 2048); + + c_size = lzxpress_compress(zeros, + N_ZEROS, + comp, + talloc_get_size(comp)); + + torture_assert_int_equal(test, c_size, expected_c_size, + "fixed lzxpress_compress size"); + + decomp = talloc_size(tmp_ctx, N_ZEROS * 2); + c_size = lzxpress_decompress(comp, + c_size, + decomp, + N_ZEROS * 2); + + if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &t_end) != 0) { + if (clock_gettime(CUSTOM_CLOCK_MONOTONIC, &t_end) != 0) { + clock_gettime(CLOCK_REALTIME, &t_end); + } + } + elapsed_ns = ( + (t_end.tv_sec - t_start.tv_sec) * 1000U * 1000U * 1000U) + + (t_end.tv_nsec - t_start.tv_nsec); + torture_comment(test, "round-trip time: %"PRIu64" ns\n", elapsed_ns); + torture_assert(test, elapsed_ns < 3 * 1000U * 1000U * 1000U, + "million zeros round trip tool > 3 seconds"); + torture_assert_mem_equal(test, decomp, zeros, N_ZEROS, + "fixed lzxpress_decompress data"); + + talloc_free(tmp_ctx); + return true; +} + + +static bool test_lzxpress_round_trip(struct torture_context *test) +{ + /* + * Examples found using via fuzzing. + */ + TALLOC_CTX *tmp_ctx = talloc_new(test); + size_t i; + struct b64_pair { + const char *uncompressed; + const char *compressed; + } pairs[] = { + { /* this results in a trailing flags block */ + "AAICAmq/EKdP785YU2Ddh7d4vUtdlQyLeHV09LHpUBw=", + "AAAAAAACAgJqvxCnT+/OWFNg3Ye3eL1LXZUMi3h1dPSx6VAc/////w==", + }, + { /* empty string compresses to empty string */ + "", "" + }, + }; + const size_t alloc_size = 1000; + uint8_t *data = talloc_array(tmp_ctx, uint8_t, alloc_size); + + for (i = 0; i < ARRAY_SIZE(pairs); i++) { + ssize_t len; + DATA_BLOB uncomp = base64_decode_data_blob_talloc( + tmp_ctx, + pairs[i].uncompressed); + DATA_BLOB comp = base64_decode_data_blob_talloc( + tmp_ctx, + pairs[i].compressed); + + len = lzxpress_compress(uncomp.data, + uncomp.length, + data, + alloc_size); + + torture_assert_int_equal(test, len, comp.length, + "lzexpress compression size"); + + torture_assert_mem_equal(test, comp.data, data, len, + "lzxpress compression data"); + + len = lzxpress_decompress(comp.data, + comp.length, + data, + alloc_size); + + torture_assert_int_equal(test, len, uncomp.length, + "lzexpress decompression size"); + + torture_assert_mem_equal(test, uncomp.data, data, len, + "lzxpress decompression data"); + } + talloc_free(tmp_ctx); + return true; +} + + +struct torture_suite *torture_local_compression(TALLOC_CTX *mem_ctx) +{ + struct torture_suite *suite = torture_suite_create(mem_ctx, "compression"); + + torture_suite_add_simple_test(suite, "lzxpress", test_lzxpress); + torture_suite_add_simple_test(suite, "lzxpress_msft_data1", test_msft_data1); + torture_suite_add_simple_test(suite, "lzxpress_msft_data2", test_msft_data2); + torture_suite_add_simple_test(suite, "lzxpress2", test_lzxpress2); + torture_suite_add_simple_test(suite, "lzxpress3", test_lzxpress3); + torture_suite_add_simple_test(suite, "lzxpress4", test_lzxpress4); + torture_suite_add_simple_test(suite, "lzxpress_many_zeros", + test_lzxpress_many_zeros); + torture_suite_add_simple_test(suite, "lzxpress_round_trip", + test_lzxpress_round_trip); + return suite; +} diff --git a/lib/compression/wscript_build b/lib/compression/wscript_build new file mode 100644 index 0000000..7ad5333 --- /dev/null +++ b/lib/compression/wscript_build @@ -0,0 +1,6 @@ +#!/usr/bin/env python + +bld.SAMBA_SUBSYSTEM('LZXPRESS', + deps='replace', + source='lzxpress.c' + ) |