From 8daa83a594a2e98f39d764422bfbdbc62c9efd44 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 19:20:00 +0200 Subject: Adding upstream version 2:4.20.0+dfsg. Signed-off-by: Daniel Baumann --- lib/compression/lzxpress.c | 507 +++++ lib/compression/lzxpress.h | 50 + lib/compression/lzxpress_huffman.c | 2045 ++++++++++++++++++++ lib/compression/lzxpress_huffman.h | 95 + lib/compression/pycompression.c | 304 +++ lib/compression/tests/scripts/README | 19 + .../tests/scripts/decode-huffman-header | 54 + .../tests/scripts/generate-windows-test-vectors.c | 206 ++ lib/compression/tests/scripts/make-fuzz-examples | 45 + lib/compression/tests/scripts/make-test-vectors | 185 ++ lib/compression/tests/scripts/three-byte-hash | 49 + lib/compression/tests/test_lzx_huffman.c | 1255 ++++++++++++ lib/compression/tests/test_lzxpress_plain.c | 1194 ++++++++++++ lib/compression/wscript_build | 25 + 14 files changed, 6033 insertions(+) create mode 100644 lib/compression/lzxpress.c create mode 100644 lib/compression/lzxpress.h create mode 100644 lib/compression/lzxpress_huffman.c create mode 100644 lib/compression/lzxpress_huffman.h create mode 100644 lib/compression/pycompression.c create mode 100644 lib/compression/tests/scripts/README create mode 100755 lib/compression/tests/scripts/decode-huffman-header create mode 100644 lib/compression/tests/scripts/generate-windows-test-vectors.c create mode 100755 lib/compression/tests/scripts/make-fuzz-examples create mode 100755 lib/compression/tests/scripts/make-test-vectors create mode 100755 lib/compression/tests/scripts/three-byte-hash create mode 100644 lib/compression/tests/test_lzx_huffman.c create mode 100644 lib/compression/tests/test_lzxpress_plain.c create mode 100644 lib/compression/wscript_build (limited to 'lib/compression') diff --git a/lib/compression/lzxpress.c b/lib/compression/lzxpress.c new file mode 100644 index 0000000..5e5e5ba --- /dev/null +++ b/lib/compression/lzxpress.c @@ -0,0 +1,507 @@ +/* + * Copyright (C) Matthieu Suiche 2008 + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the author nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include "replace.h" +#include "lzxpress.h" +#include "../lib/util/byteorder.h" + + +#define __CHECK_BYTES(__size, __index, __needed) do { \ + if (unlikely(__index >= __size)) { \ + return -1; \ + } else { \ + uint32_t __avail = __size - __index; \ + if (unlikely(__needed > __avail)) { \ + return -1; \ + } \ + } \ +} while(0) + + +/* + * LZX_PLAIN_COMP_HASH_BITS determines how big the hash table for finding + * matches will be. + * + * The window in which we look for matches is 8192 bytes. That means with + * random data a value of 13 is getting close to no collisions, while a 12 + * will miss about half the possible matches. With compressible data there + * will generally be fewer and less diverse entries, so collisions are rarer. + * + * In the testsuite, bith 12 and 13 give better compression than Windows, but + * 12 is faster. 11 does not save time and costs accuracy. Thus we prefer 12. + */ +#define LZX_PLAIN_COMP_HASH_BITS 12 +/* + * LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS is how far ahead to search in the + * circular hash table for a match, before we give up. A bigger number will + * generally lead to better but slower compression, but a stupidly big number + * will just be worse. + */ +#define LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS 5 +#define HASH_MASK ((1 << LZX_PLAIN_COMP_HASH_BITS) - 1) + +static inline uint16_t three_byte_hash(const uint8_t *bytes) +{ + uint16_t a = bytes[0]; + uint16_t b = bytes[1] ^ 0x2e; + uint16_t c = bytes[2] ^ 0x55; + uint16_t ca = c - a; + uint16_t d = ((a + b) << 8) ^ (ca << 5) ^ (c + b) ^ (0xcab + a); + return d & HASH_MASK; +} + + +static inline void store_match(uint32_t *hash_table, + uint16_t h, + uint32_t offset) +{ + int i; + uint32_t o = hash_table[h]; + uint16_t h2; + uint16_t worst_h; + int worst_score; + + if (o >= offset) { + /* there is nothing there yet */ + hash_table[h] = offset; + return; + } + for (i = 1; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) { + h2 = (h + i) & HASH_MASK; + if (hash_table[h2] >= offset) { + hash_table[h2] = offset; + return; + } + } + /* + * There are no slots, but we really want to store this, so we'll kick + * out the one with the longest distance. + */ + worst_h = h; + worst_score = offset - o; + for (i = 1; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) { + int score; + h2 = (h + i) & HASH_MASK; + o = hash_table[h2]; + score = offset - o; + if (score > worst_score) { + worst_score = score; + worst_h = h2; + } + } + hash_table[worst_h] = offset; +} + + +struct match { + const uint8_t *there; + uint32_t length; +}; + + +static inline struct match lookup_match(uint32_t *hash_table, + uint16_t h, + const uint8_t *data, + uint32_t offset, + size_t max_len) +{ + int i; + uint32_t o; + uint16_t h2; + size_t len; + const uint8_t *there = NULL; + const uint8_t *here = data + offset; + struct match best = {0}; + + for (i = 0; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) { + h2 = (h + i) & HASH_MASK; + o = hash_table[h2]; + if (o >= offset) { + /* + * Either this is 0xffffffff, or something is really + * wrong. + * + * In setting this, we would never have stepped over + * an 0xffffffff, so we won't now. + */ + break; + } + if (offset - o > 8192) { + /* Too far away to use */ + continue; + } + there = data + o; + /* + * When we already have a long match, we can try to avoid + * measuring out another long, but shorter match. + */ + if (best.length > 1000 && + there[best.length - 1] != best.there[best.length - 1]) { + continue; + } + + for (len = 0; + len < max_len && here[len] == there[len]; + len++) { + /* counting */ + } + if (len > 2) { + if (len > best.length) { + best.length = len; + best.there = there; + } + } + } + return best; +} + +struct write_context { + uint8_t *compressed; + uint32_t compressed_pos; + uint32_t max_compressed_size; + uint32_t indic; + uint32_t indic_bit; + uint32_t indic_pos; + uint32_t nibble_index; +}; + + +#define CHECK_INPUT_BYTES(__needed) \ + __CHECK_BYTES(uncompressed_size, uncompressed_pos, __needed) +#define CHECK_OUTPUT_BYTES(__needed) \ + __CHECK_BYTES(wc->max_compressed_size, wc->compressed_pos, __needed) + + +static inline ssize_t push_indicator_bit(struct write_context *wc, uint32_t bit) +{ + wc->indic = (wc->indic << 1) | bit; + wc->indic_bit += 1; + + if (wc->indic_bit == 32) { + PUSH_LE_U32(wc->compressed, wc->indic_pos, wc->indic); + wc->indic_bit = 0; + CHECK_OUTPUT_BYTES(sizeof(uint32_t)); + wc->indic_pos = wc->compressed_pos; + wc->compressed_pos += sizeof(uint32_t); + } + return wc->indic_pos; +} + + +static ssize_t encode_match(struct write_context *wc, + struct match match, + const uint8_t *here) +{ + uint32_t match_len = match.length - 3; + uint32_t best_offset = here - match.there - 1; + uint16_t metadata; + + if (best_offset > 8191) { + return -1; + } + + CHECK_OUTPUT_BYTES(sizeof(uint16_t)); + metadata = (uint16_t)((best_offset << 3) | MIN(match_len, 7)); + PUSH_LE_U16(wc->compressed, wc->compressed_pos, metadata); + wc->compressed_pos += sizeof(uint16_t); + + if (match_len >= 7) { + match_len -= 7; + + if (wc->nibble_index == 0) { + wc->nibble_index = wc->compressed_pos; + + CHECK_OUTPUT_BYTES(sizeof(uint8_t)); + wc->compressed[wc->nibble_index] = MIN(match_len, 15); + wc->compressed_pos += sizeof(uint8_t); + } else { + wc->compressed[wc->nibble_index] |= MIN(match_len, 15) << 4; + wc->nibble_index = 0; + } + + if (match_len >= 15) { + match_len -= 15; + + CHECK_OUTPUT_BYTES(sizeof(uint8_t)); + wc->compressed[wc->compressed_pos] = MIN(match_len, 255); + wc->compressed_pos += sizeof(uint8_t); + + if (match_len >= 255) { + /* Additional match_len */ + + match_len += 7 + 15; + + if (match_len < (1 << 16)) { + CHECK_OUTPUT_BYTES(sizeof(uint16_t)); + PUSH_LE_U16(wc->compressed, wc->compressed_pos, + match_len); + wc->compressed_pos += sizeof(uint16_t); + } else { + CHECK_OUTPUT_BYTES(sizeof(uint16_t) + + sizeof(uint32_t)); + PUSH_LE_U16(wc->compressed, + wc->compressed_pos, 0); + wc->compressed_pos += sizeof(uint16_t); + + PUSH_LE_U32(wc->compressed, + wc->compressed_pos, + match_len); + wc->compressed_pos += sizeof(uint32_t); + } + } + } + } + return push_indicator_bit(wc, 1); +} + +#undef CHECK_OUTPUT_BYTES +#define CHECK_OUTPUT_BYTES(__needed) \ + __CHECK_BYTES(wc.max_compressed_size, wc.compressed_pos, __needed) + + +ssize_t lzxpress_compress(const uint8_t *uncompressed, + uint32_t uncompressed_size, + uint8_t *compressed, + uint32_t max_compressed_size) +{ + /* + * This is the algorithm in [MS-XCA] 2.3 "Plain LZ77 Compression". + * + * It avoids Huffman encoding by including literal bytes inline when a + * match is not found. Every so often it includes a uint32 bit map + * flagging which positions contain matches and which contain + * literals. The encoding of matches is of variable size, depending on + * the match length; they are always at least 16 bits long, and can + * implicitly use unused half-bytes from earlier in the stream. + */ + ssize_t ret; + uint32_t uncompressed_pos; + struct write_context wc = { + .indic = 0, + .indic_pos = 0, + .indic_bit = 0, + .nibble_index = 0, + .compressed = compressed, + .compressed_pos = 0, + .max_compressed_size = max_compressed_size + }; + uint32_t hash_table[1 << LZX_PLAIN_COMP_HASH_BITS]; + memset(hash_table, 0xff, sizeof(hash_table)); + + if (!uncompressed_size) { + return 0; + } + + uncompressed_pos = 0; + CHECK_OUTPUT_BYTES(sizeof(uint32_t)); + PUSH_LE_U32(wc.compressed, wc.compressed_pos, 0); + wc.compressed_pos += sizeof(uint32_t); + + while ((uncompressed_pos < uncompressed_size) && + (wc.compressed_pos < wc.max_compressed_size)) { + + /* maximum len we can encode into metadata */ + const uint32_t max_len = MIN(0xFFFF + 3, + uncompressed_size - uncompressed_pos); + const uint8_t *here = uncompressed + uncompressed_pos; + uint16_t h; + struct match match = {0}; + + if (max_len >= 3) { + h = three_byte_hash(here); + match = lookup_match(hash_table, + h, + uncompressed, + uncompressed_pos, + max_len); + + store_match(hash_table, h, uncompressed_pos); + } else { + match.there = NULL; + match.length = 0; + } + + if (match.there == NULL) { + /* + * This is going to be a literal byte, which we flag + * by setting a bit in an indicator field somewhere + * earlier in the stream. + */ + CHECK_INPUT_BYTES(sizeof(uint8_t)); + CHECK_OUTPUT_BYTES(sizeof(uint8_t)); + wc.compressed[wc.compressed_pos++] = *here; + uncompressed_pos++; + + ret = push_indicator_bit(&wc, 0); + if (ret < 0) { + return ret; + } + } else { + ret = encode_match(&wc, match, here); + if (ret < 0) { + return ret; + } + uncompressed_pos += match.length; + } + } + + if (wc.indic_bit != 0) { + wc.indic <<= 32 - wc.indic_bit; + } + wc.indic |= UINT32_MAX >> wc.indic_bit; + PUSH_LE_U32(wc.compressed, wc.indic_pos, wc.indic); + + return wc.compressed_pos; +} + +ssize_t lzxpress_decompress(const uint8_t *input, + uint32_t input_size, + uint8_t *output, + uint32_t max_output_size) +{ + /* + * This is the algorithm in [MS-XCA] 2.4 "Plain LZ77 Decompression + * Algorithm Details". + */ + uint32_t output_index, input_index; + uint32_t indicator, indicator_bit; + uint32_t nibble_index; + + if (input_size == 0) { + return 0; + } + + output_index = 0; + input_index = 0; + indicator = 0; + indicator_bit = 0; + nibble_index = 0; + +#undef CHECK_INPUT_BYTES +#define CHECK_INPUT_BYTES(__needed) \ + __CHECK_BYTES(input_size, input_index, __needed) +#undef CHECK_OUTPUT_BYTES +#define CHECK_OUTPUT_BYTES(__needed) \ + __CHECK_BYTES(max_output_size, output_index, __needed) + + do { + if (indicator_bit == 0) { + CHECK_INPUT_BYTES(sizeof(uint32_t)); + indicator = PULL_LE_U32(input, input_index); + input_index += sizeof(uint32_t); + if (input_index == input_size) { + /* + * The compressor left room for indicator + * flags for data that doesn't exist. + */ + break; + } + indicator_bit = 32; + } + indicator_bit--; + + /* + * check whether the bit specified by indicator_bit is set or not + * set in indicator. For example, if indicator_bit has value 4 + * check whether the 4th bit of the value in indicator is set + */ + if (((indicator >> indicator_bit) & 1) == 0) { + CHECK_INPUT_BYTES(sizeof(uint8_t)); + CHECK_OUTPUT_BYTES(sizeof(uint8_t)); + output[output_index] = input[input_index]; + input_index += sizeof(uint8_t); + output_index += sizeof(uint8_t); + } else { + uint32_t length; + uint32_t offset; + + CHECK_INPUT_BYTES(sizeof(uint16_t)); + length = PULL_LE_U16(input, input_index); + input_index += sizeof(uint16_t); + offset = (length >> 3) + 1; + length &= 7; + + if (length == 7) { + if (nibble_index == 0) { + CHECK_INPUT_BYTES(sizeof(uint8_t)); + nibble_index = input_index; + length = input[input_index] & 0xf; + input_index += sizeof(uint8_t); + } else { + length = input[nibble_index] >> 4; + nibble_index = 0; + } + + if (length == 15) { + CHECK_INPUT_BYTES(sizeof(uint8_t)); + length = input[input_index]; + input_index += sizeof(uint8_t); + if (length == 255) { + CHECK_INPUT_BYTES(sizeof(uint16_t)); + length = PULL_LE_U16(input, input_index); + input_index += sizeof(uint16_t); + if (length == 0) { + CHECK_INPUT_BYTES(sizeof(uint32_t)); + length = PULL_LE_U32(input, input_index); + input_index += sizeof(uint32_t); + } + + if (length < (15 + 7)) { + return -1; + } + length -= (15 + 7); + } + length += 15; + } + length += 7; + } + length += 3; + + if (length == 0) { + return -1; + } + + for (; length > 0; --length) { + if (offset > output_index) { + return -1; + } + CHECK_OUTPUT_BYTES(sizeof(uint8_t)); + output[output_index] = output[output_index - offset]; + output_index += sizeof(uint8_t); + } + } + } while ((output_index < max_output_size) && (input_index < (input_size))); + + return output_index; +} diff --git a/lib/compression/lzxpress.h b/lib/compression/lzxpress.h new file mode 100644 index 0000000..df0ee59 --- /dev/null +++ b/lib/compression/lzxpress.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) Matthieu Suiche 2008 + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the author nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#ifndef _LZXPRESS_H +#define _LZXPRESS_H + +#define XPRESS_BLOCK_SIZE 0x10000 + +ssize_t lzxpress_compress(const uint8_t *uncompressed, + uint32_t uncompressed_size, + uint8_t *compressed, + uint32_t max_compressed_size); + +ssize_t lzxpress_decompress(const uint8_t *input, + uint32_t input_size, + uint8_t *output, + uint32_t max_output_size); + +#endif /* _LZXPRESS_H */ diff --git a/lib/compression/lzxpress_huffman.c b/lib/compression/lzxpress_huffman.c new file mode 100644 index 0000000..e14419c --- /dev/null +++ b/lib/compression/lzxpress_huffman.c @@ -0,0 +1,2045 @@ +/* + * Samba compression library - LGPLv3 + * + * Copyright © Catalyst IT 2022 + * + * Written by Douglas Bagnall + * and Joseph Sutton + * + * ** NOTE! The following LGPL license applies to this file. + * ** It does NOT imply that all of Samba is released under the LGPL + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include + +#include "replace.h" +#include "lzxpress_huffman.h" +#include "lib/util/stable_sort.h" +#include "lib/util/debug.h" +#include "lib/util/byteorder.h" +#include "lib/util/bytearray.h" + +/* + * DEBUG_NO_LZ77_MATCHES toggles the encoding of matches as matches. If it is + * false the potential match is written as a series of literals, which is a + * valid but usually inefficient encoding. This is useful for isolating a + * problem to either the LZ77 or the Huffman stage. + */ +#ifndef DEBUG_NO_LZ77_MATCHES +#define DEBUG_NO_LZ77_MATCHES false +#endif + +/* + * DEBUG_HUFFMAN_TREE forces the drawing of ascii art huffman trees during + * compression and decompression. + * + * These trees will also be drawn at DEBUG level 10, but that doesn't work + * with cmocka tests. + */ +#ifndef DEBUG_HUFFMAN_TREE +#define DEBUG_HUFFMAN_TREE false +#endif + +#if DEBUG_HUFFMAN_TREE +#define DBG(...) fprintf(stderr, __VA_ARGS__) +#else +#define DBG(...) DBG_INFO(__VA_ARGS__) +#endif + + +#define LZXPRESS_ERROR -1LL + +/* + * We won't encode a match length longer than MAX_MATCH_LENGTH. + * + * Reports are that Windows has a limit at 64M. + */ +#define MAX_MATCH_LENGTH (64 * 1024 * 1024) + + +struct bitstream { + const uint8_t *bytes; + size_t byte_pos; + size_t byte_size; + uint32_t bits; + int remaining_bits; + uint16_t *table; +}; + + +#if ! defined __has_builtin +#define __has_builtin(x) 0 +#endif + +/* + * bitlen_nonzero_16() returns the bit number of the most significant bit, or + * put another way, the integer log base 2. Log(0) is undefined; the argument + * has to be non-zero! + * 1 -> 0 + * 2,3 -> 1 + * 4-7 -> 2 + * 1024 -> 10, etc + * + * Probably this is handled by a compiler intrinsic function that maps to a + * dedicated machine instruction. + */ + +static inline int bitlen_nonzero_16(uint16_t x) +{ +#if __has_builtin(__builtin_clz) + + /* __builtin_clz returns the number of leading zeros */ + return (sizeof(unsigned int) * CHAR_BIT) - 1 + - __builtin_clz((unsigned int) x); + +#else + + int count = -1; + while(x) { + x >>= 1; + count++; + } + return count; + +#endif +} + + +struct lzxhuff_compressor_context { + const uint8_t *input_bytes; + size_t input_size; + size_t input_pos; + size_t prev_block_pos; + uint8_t *output; + size_t available_size; + size_t output_pos; +}; + +static int compare_huffman_node_count(struct huffman_node *a, + struct huffman_node *b) +{ + return a->count - b->count; +} + +static int compare_huffman_node_depth(struct huffman_node *a, + struct huffman_node *b) +{ + int c = a->depth - b->depth; + if (c != 0) { + return c; + } + return (int)a->symbol - (int)b->symbol; +} + + +#define HASH_MASK ((1 << LZX_HUFF_COMP_HASH_BITS) - 1) + +static inline uint16_t three_byte_hash(const uint8_t *bytes) +{ + /* + * MS-XCA says "three byte hash", but does not specify it. + * + * This one is just cobbled together, but has quite good distribution + * in the 12-14 bit forms, which is what we care about most. + * e.g: 13 bit: median 2048, min 2022, max 2074, stddev 6.0 + */ + uint16_t a = bytes[0]; + uint16_t b = bytes[1] ^ 0x2e; + uint16_t c = bytes[2] ^ 0x55; + uint16_t ca = c - a; + uint16_t d = ((a + b) << 8) ^ (ca << 5) ^ (c + b) ^ (0xcab + a); + return d & HASH_MASK; +} + + +static inline uint16_t encode_match(size_t len, size_t offset) +{ + uint16_t code = 256; + code |= MIN(len - 3, 15); + code |= bitlen_nonzero_16(offset) << 4; + return code; +} + +/* + * debug_huffman_tree() uses debug_huffman_tree_print() to draw the Huffman + * tree in ascii art. + * + * Note that the Huffman tree is probably not the same as that implied by the + * canonical Huffman encoding that is finally used. That tree would be the + * same shape, but with the left and right toggled to sort the branches by + * length, after which the symbols for each length sorted by value. + */ + +static void debug_huffman_tree_print(struct huffman_node *node, + int *trail, int depth) +{ + if (node->left == NULL) { + /* time to print a row */ + int j; + bool branched = false; + int row[17]; + char c[100]; + int s = node->symbol; + char code[17]; + if (depth > 15) { + fprintf(stderr, + " \033[1;31m Max depth exceeded! (%d)\033[0m " + " symbol %#3x claimed depth %d count %d\n", + depth, node->symbol, node->depth, node->count); + return; + } + for (j = depth - 1; j >= 0; j--) { + if (branched) { + if (trail[j] == -1) { + row[j] = -3; + } else { + row[j] = -2; + } + } else if (trail[j] == -1) { + row[j] = -1; + branched = true; + } else { + row[j] = trail[j]; + } + } + for (j = 0; j < depth; j++) { + switch (row[j]) { + case -3: + code[j] = '1'; + fprintf(stderr, " "); + break; + case -2: + code[j] = '0'; + fprintf(stderr, " │ "); + break; + case -1: + code[j] = '1'; + fprintf(stderr, " ╰─"); + break; + default: + code[j] = '0'; + fprintf(stderr, "%5d─┬─", row[j]); + break; + } + } + code[depth] = 0; + if (s < 32) { + snprintf(c, sizeof(c), + "\033[1;32m%02x\033[0m \033[1;33m%c%c%c\033[0m", + s, + 0xE2, 0x90, 0x80 + s); /* utf-8 for symbol */ + } else if (s < 127) { + snprintf(c, sizeof(c), + "\033[1;32m%2x\033[0m '\033[10;32m%c\033[0m'", + s, s); + } else if (s < 256) { + snprintf(c, sizeof(c), "\033[1;32m%2x\033[0m", s); + } else { + uint16_t len = (s & 15) + 3; + uint16_t dbits = ((s >> 4) & 15) + 1; + snprintf(c, sizeof(c), + " \033[0;33mlen:%2d%s, " + "dist:%d-%d \033[0m \033[1;32m%3x\033[0m%s", + len, + len == 18 ? "+" : "", + 1 << (dbits - 1), + (1 << dbits) - 1, + s, + s == 256 ? " \033[1;31mEOF\033[0m" : ""); + + } + + fprintf(stderr, "──%5d %s \033[2;37m%s\033[0m\n", + node->count, c, code); + return; + } + trail[depth] = node->count; + debug_huffman_tree_print(node->left, trail, depth + 1); + trail[depth] = -1; + debug_huffman_tree_print(node->right, trail, depth + 1); +} + + +/* + * If DEBUG_HUFFMAN_TREE is defined true, debug_huffman_tree() + * will print a tree looking something like this: + * + * 7─┬─── 3 len:18+, dist:1-1 10f 0 + * ╰─ 4─┬─ 2─┬─── 1 61 'a' 100 + * │ ╰─── 1 62 'b' 101 + * ╰─ 2─┬─── 1 63 'c' 110 + * ╰─── 1 len: 3, dist:1-1 100 EOF 111 + * + * This is based off a Huffman root node, and the tree may not be the same as + * the canonical tree. + */ +static void debug_huffman_tree(struct huffman_node *root) +{ + int trail[17]; + debug_huffman_tree_print(root, trail, 0); +} + + +/* + * If DEBUG_HUFFMAN_TREE is defined true, debug_huffman_tree_from_table() + * will print something like this based on a decoding symbol table. + * + * Tree from decoding table 9 nodes → 5 codes + * 10000─┬─── 5000 len:18+, dist:1-1 10f 0 + * ╰─ 5000─┬─ 2500─┬─── 1250 61 'a' 100 + * │ ╰─── 1250 62 'b' 101 + * ╰─ 2500─┬─── 1250 63 'c' 110 + * ╰─── 1250 len: 3, dist:1-1 100 EOF 111 + * + * This is the canonical form of the Huffman tree where the actual counts + * aren't known (we use "10000" to help indicate relative frequencies). + */ +static void debug_huffman_tree_from_table(uint16_t *table) +{ + int trail[17]; + struct huffman_node nodes[1024] = {{0}}; + uint16_t codes[1024]; + size_t n = 1; + size_t i = 0; + codes[0] = 0; + nodes[0].count = 10000; + + while (i < n) { + uint16_t index = codes[i]; + struct huffman_node *node = &nodes[i]; + if (table[index] == 0xffff) { + /* internal node */ + index <<= 1; + /* left */ + index++; + codes[n] = index; + node->left = nodes + n; + nodes[n].count = node->count >> 1; + n++; + /*right*/ + index++; + codes[n] = index; + node->right = nodes + n; + nodes[n].count = node->count >> 1; + n++; + } else { + /* leaf node */ + node->symbol = table[index] & 511; + } + i++; + } + + fprintf(stderr, + "\033[1;34m Tree from decoding table\033[0m " + "%zu nodes → %zu codes\n", + n, (n + 1) / 2); + debug_huffman_tree_print(nodes, trail, 0); +} + + +static bool depth_walk(struct huffman_node *n, uint32_t depth) +{ + bool ok; + if (n->left == NULL) { + /* this is a leaf, record the depth */ + n->depth = depth; + return true; + } + if (depth > 14) { + return false; + } + ok = (depth_walk(n->left, depth + 1) && + depth_walk(n->right, depth + 1)); + + return ok; +} + + +static bool check_and_record_depths(struct huffman_node *root) +{ + return depth_walk(root, 0); +} + + +static bool encode_values(struct huffman_node *leaves, + size_t n_leaves, + uint16_t symbol_values[512]) +{ + size_t i; + /* + * See, we have a leading 1 in our internal code representation, which + * indicates the code length. + */ + uint32_t code = 1; + uint32_t code_len = 0; + memset(symbol_values, 0, sizeof(uint16_t) * 512); + for (i = 0; i < n_leaves; i++) { + code <<= leaves[i].depth - code_len; + code_len = leaves[i].depth; + + symbol_values[leaves[i].symbol] = code; + code++; + } + /* + * The last code should be 11111... with code_len + 1 ones. The final + * code++ will wrap this round to 1000... with code_len + 1 zeroes. + */ + + if (code != 2 << code_len) { + return false; + } + return true; +} + + +static int generate_huffman_codes(struct huffman_node *leaf_nodes, + struct huffman_node *internal_nodes, + uint16_t symbol_values[512]) +{ + size_t head_leaf = 0; + size_t head_branch = 0; + size_t tail_branch = 0; + struct huffman_node *huffman_root = NULL; + size_t i, j; + size_t n_leaves = 0; + + /* + * Before we sort the nodes, we can eliminate the unused ones. + */ + for (i = 0; i < 512; i++) { + if (leaf_nodes[i].count) { + leaf_nodes[n_leaves] = leaf_nodes[i]; + n_leaves++; + } + } + if (n_leaves == 0) { + return LZXPRESS_ERROR; + } + if (n_leaves == 1) { + /* + * There is *almost* no way this should happen, and it would + * ruin the tree (because the shortest possible codes are 1 + * bit long, and there are two of them). + * + * The only way to get here is in an internal block in a + * 3-or-more block message (i.e. > 128k), which consists + * entirely of a match starting in the previous block (if it + * was the end block, it would have the EOF symbol). + * + * What we do is add a dummy symbol which is this one XOR 256. + * It won't be used in the stream but will balance the tree. + */ + leaf_nodes[1] = leaf_nodes[0]; + leaf_nodes[1].symbol ^= 0x100; + n_leaves = 2; + } + + /* note, in sort we're using internal_nodes as auxiliary space */ + stable_sort(leaf_nodes, + internal_nodes, + n_leaves, + sizeof(struct huffman_node), + (samba_compare_fn_t)compare_huffman_node_count); + + /* + * This outer loop is for re-quantizing the counts if the tree is too + * tall (>15), which we need to do because the final encoding can't + * express a tree that deep. + * + * In theory, this should be a 'while (true)' loop, but we chicken + * out with 10 iterations, just in case. + * + * In practice it will almost always resolve in the first round; if + * not then, in the second or third. Remember we'll looking at 64k or + * less, so the rarest we can have is 1 in 64k; each round of + * quantization effectively doubles its frequency to 1 in 32k, 1 in + * 16k, etc, until we're treating the rare symbol as actually quite + * common. + */ + for (j = 0; j < 10; j++) { + bool less_than_15_bits; + while (true) { + struct huffman_node *a = NULL; + struct huffman_node *b = NULL; + size_t leaf_len = n_leaves - head_leaf; + size_t internal_len = tail_branch - head_branch; + + if (leaf_len + internal_len == 1) { + /* + * We have the complete tree. The root will be + * an internal node unless there is just one + * symbol, which is already impossible. + */ + if (unlikely(leaf_len == 1)) { + return LZXPRESS_ERROR; + } else { + huffman_root = \ + &internal_nodes[head_branch]; + } + break; + } + /* + * We know here we have at least two nodes, and we + * want to select the two lowest scoring ones. Those + * have to be either a) the head of each queue, or b) + * the first two nodes of either queue. + * + * The complicating factors are: a) we need to check + * the length of each queue, and b) in the case of + * ties, we prefer to pair leaves with leaves. + * + * Note a complication we don't have: the leaf node + * queue never grows, and the subtree queue starts + * empty and cannot grow beyond n - 1. It feeds on + * itself. We don't need to think about overflow. + */ + if (leaf_len == 0) { + /* two from subtrees */ + a = &internal_nodes[head_branch]; + b = &internal_nodes[head_branch + 1]; + head_branch += 2; + } else if (internal_len == 0) { + /* two from nodes */ + a = &leaf_nodes[head_leaf]; + b = &leaf_nodes[head_leaf + 1]; + head_leaf += 2; + } else if (leaf_len == 1 && internal_len == 1) { + /* one of each */ + a = &leaf_nodes[head_leaf]; + b = &internal_nodes[head_branch]; + head_branch++; + head_leaf++; + } else { + /* + * Take the lowest head, twice, checking for + * length after taking the first one. + */ + if (leaf_nodes[head_leaf].count > + internal_nodes[head_branch].count) { + a = &internal_nodes[head_branch]; + head_branch++; + if (internal_len == 1) { + b = &leaf_nodes[head_leaf]; + head_leaf++; + goto done; + } + } else { + a = &leaf_nodes[head_leaf]; + head_leaf++; + if (leaf_len == 1) { + b = &internal_nodes[head_branch]; + head_branch++; + goto done; + } + } + /* the other node */ + if (leaf_nodes[head_leaf].count > + internal_nodes[head_branch].count) { + b = &internal_nodes[head_branch]; + head_branch++; + } else { + b = &leaf_nodes[head_leaf]; + head_leaf++; + } + } + done: + /* + * Now we add a new node to the subtrees list that + * combines the score of node_a and node_b, and points + * to them as children. + */ + internal_nodes[tail_branch].count = a->count + b->count; + internal_nodes[tail_branch].left = a; + internal_nodes[tail_branch].right = b; + tail_branch++; + if (tail_branch == n_leaves) { + /* + * We're not getting here, no way, never ever. + * Unless we made a terrible mistake. + * + * That is, in a binary tree with n leaves, + * there are ALWAYS n-1 internal nodes. + */ + return LZXPRESS_ERROR; + } + } + if (CHECK_DEBUGLVL(10) || DEBUG_HUFFMAN_TREE) { + debug_huffman_tree(huffman_root); + } + /* + * We have a tree, and need to turn it into a lookup table, + * and see if it is shallow enough (<= 15). + */ + less_than_15_bits = check_and_record_depths(huffman_root); + if (less_than_15_bits) { + /* + * Now the leaf nodes know how deep they are, and we + * no longer need the internal nodes. + * + * We need to sort the nodes of equal depth, so that + * they are sorted by depth first, and symbol value + * second. The internal_nodes can again be auxiliary + * memory. + */ + stable_sort( + leaf_nodes, + internal_nodes, + n_leaves, + sizeof(struct huffman_node), + (samba_compare_fn_t)compare_huffman_node_depth); + + encode_values(leaf_nodes, n_leaves, symbol_values); + + return n_leaves; + } + + /* + * requantize by halving and rounding up, so that small counts + * become relatively bigger. This will lead to a flatter tree. + */ + for (i = 0; i < n_leaves; i++) { + leaf_nodes[i].count >>= 1; + leaf_nodes[i].count += 1; + } + head_leaf = 0; + head_branch = 0; + tail_branch = 0; + } + return LZXPRESS_ERROR; +} + +/* + * LZX_HUFF_COMP_HASH_SEARCH_ATTEMPTS is how far ahead to search in the + * circular hash table for a match, before we give up. A bigger number will + * generally lead to better but slower compression, but a stupidly big number + * will just be worse. + * + * If you're fiddling with this, consider also fiddling with + * LZX_HUFF_COMP_HASH_BITS. + */ +#define LZX_HUFF_COMP_HASH_SEARCH_ATTEMPTS 5 + +static inline void store_match(uint16_t *hash_table, + uint16_t h, + uint16_t offset) +{ + int i; + uint16_t o = hash_table[h]; + uint16_t h2; + uint16_t worst_h; + int worst_score; + + if (o == 0xffff) { + /* there is nothing there yet */ + hash_table[h] = offset; + return; + } + for (i = 1; i < LZX_HUFF_COMP_HASH_SEARCH_ATTEMPTS; i++) { + h2 = (h + i) & HASH_MASK; + if (hash_table[h2] == 0xffff) { + hash_table[h2] = offset; + return; + } + } + /* + * There are no slots, but we really want to store this, so we'll kick + * out the one with the longest distance. + */ + worst_h = h; + worst_score = offset - o; + for (i = 1; i < LZX_HUFF_COMP_HASH_SEARCH_ATTEMPTS; i++) { + int score; + h2 = (h + i) & HASH_MASK; + o = hash_table[h2]; + score = offset - o; + if (score > worst_score) { + worst_score = score; + worst_h = h2; + } + } + hash_table[worst_h] = offset; +} + + +/* + * Yes, struct match looks a lot like a DATA_BLOB. + */ +struct match { + const uint8_t *there; + size_t length; +}; + + +static inline struct match lookup_match(uint16_t *hash_table, + uint16_t h, + const uint8_t *data, + const uint8_t *here, + size_t max_len) +{ + int i; + uint16_t o = hash_table[h]; + uint16_t h2; + size_t len; + const uint8_t *there = NULL; + struct match best = {0}; + + for (i = 0; i < LZX_HUFF_COMP_HASH_SEARCH_ATTEMPTS; i++) { + h2 = (h + i) & HASH_MASK; + o = hash_table[h2]; + if (o == 0xffff) { + /* + * in setting this, we would never have stepped over + * an 0xffff, so we won't now. + */ + break; + } + there = data + o; + if (here - there > 65534 || there > here) { + continue; + } + + /* + * When we already have a long match, we can try to avoid + * measuring out another long, but shorter match. + */ + if (best.length > 1000 && + there[best.length - 1] != best.there[best.length - 1]) { + continue; + } + + for (len = 0; + len < max_len && here[len] == there[len]; + len++) { + /* counting */ + } + if (len > 2) { + /* + * As a tiebreaker, we prefer the closer match which + * is likely to encode smaller (and certainly no worse). + */ + if (len > best.length || + (len == best.length && there > best.there)) { + best.length = len; + best.there = there; + } + } + } + return best; +} + + + +static ssize_t lz77_encode_block(struct lzxhuff_compressor_context *cmp_ctx, + struct lzxhuff_compressor_mem *cmp_mem, + uint16_t *hash_table, + uint16_t *prev_hash_table) +{ + uint16_t *intermediate = cmp_mem->intermediate; + struct huffman_node *leaf_nodes = cmp_mem->leaf_nodes; + uint16_t *symbol_values = cmp_mem->symbol_values; + size_t i, j, intermediate_len; + const uint8_t *data = cmp_ctx->input_bytes + cmp_ctx->input_pos; + const uint8_t *prev_block = NULL; + size_t remaining_size = cmp_ctx->input_size - cmp_ctx->input_pos; + size_t block_end = MIN(65536, remaining_size); + struct match match; + int n_symbols; + + if (cmp_ctx->input_size < cmp_ctx->input_pos) { + return LZXPRESS_ERROR; + } + + if (cmp_ctx->prev_block_pos != cmp_ctx->input_pos) { + prev_block = cmp_ctx->input_bytes + cmp_ctx->prev_block_pos; + } else if (prev_hash_table != NULL) { + /* we've got confused! hash and block should go together */ + return LZXPRESS_ERROR; + } + + /* + * leaf_nodes is used to count the symbols seen, for later Huffman + * encoding. + */ + for (i = 0; i < 512; i++) { + leaf_nodes[i] = (struct huffman_node) { + .symbol = i + }; + } + + j = 0; + + if (remaining_size < 41 || DEBUG_NO_LZ77_MATCHES) { + /* + * There is no point doing a hash table and looking for + * matches in this tiny block (remembering we are committed to + * using 32 bits, so there's a good chance we wouldn't even + * save a byte). The threshold of 41 matches Windows. + * If remaining_size < 3, we *can't* do the hash. + */ + i = 0; + } else { + /* + * We use 0xffff as the unset value for table, because it is + * not a valid match offset (and 0x0 is). + */ + memset(hash_table, 0xff, sizeof(cmp_mem->hash_table1)); + + for (i = 0; i <= block_end - 3; i++) { + uint16_t code; + const uint8_t *here = data + i; + uint16_t h = three_byte_hash(here); + size_t max_len = MIN(remaining_size - i, MAX_MATCH_LENGTH); + match = lookup_match(hash_table, + h, + data, + here, + max_len); + + if (match.there == NULL && prev_hash_table != NULL) { + /* + * If this is not the first block, + * backreferences can look into the previous + * block (but only as far as 65535 bytes, so + * the end of this block cannot see the start + * of the last one). + */ + match = lookup_match(prev_hash_table, + h, + prev_block, + here, + remaining_size - i); + } + + store_match(hash_table, h, i); + + if (match.there == NULL) { + /* add a literal and move on. */ + uint8_t c = data[i]; + leaf_nodes[c].count++; + intermediate[j] = c; + j++; + continue; + } + + /* a real match */ + if (match.length <= 65538) { + intermediate[j] = 0xffff; + intermediate[j + 1] = match.length - 3; + intermediate[j + 2] = here - match.there; + j += 3; + } else { + size_t m = match.length - 3; + intermediate[j] = 0xfffe; + intermediate[j + 1] = m & 0xffff; + intermediate[j + 2] = m >> 16; + intermediate[j + 3] = here - match.there; + j += 4; + } + code = encode_match(match.length, here - match.there); + leaf_nodes[code].count++; + i += match.length - 1; /* `- 1` for the loop i++ */ + /* + * A match can take us past the intended block length, + * extending the block. We don't need to do anything + * special for this case -- the loops will naturally + * do the right thing. + */ + } + } + + /* + * There might be some bytes at the end. + */ + for (; i < block_end; i++) { + leaf_nodes[data[i]].count++; + intermediate[j] = data[i]; + j++; + } + + if (i == remaining_size) { + /* add a trailing EOF marker (256) */ + intermediate[j] = 0xffff; + intermediate[j + 1] = 0; + intermediate[j + 2] = 1; + j += 3; + leaf_nodes[256].count++; + } + + intermediate_len = j; + + cmp_ctx->prev_block_pos = cmp_ctx->input_pos; + cmp_ctx->input_pos += i; + + /* fill in the symbols table */ + n_symbols = generate_huffman_codes(leaf_nodes, + cmp_mem->internal_nodes, + symbol_values); + if (n_symbols < 0) { + return n_symbols; + } + + return intermediate_len; +} + + + +static ssize_t write_huffman_table(uint16_t symbol_values[512], + uint8_t *output, + size_t available_size) +{ + size_t i; + + if (available_size < 256) { + return LZXPRESS_ERROR; + } + + for (i = 0; i < 256; i++) { + uint8_t b = 0; + uint16_t even = symbol_values[i * 2]; + uint16_t odd = symbol_values[i * 2 + 1]; + if (even != 0) { + b = bitlen_nonzero_16(even); + } + if (odd != 0) { + b |= bitlen_nonzero_16(odd) << 4; + } + output[i] = b; + } + return i; +} + + +struct write_context { + uint8_t *dest; + size_t dest_len; + size_t head; /* where lengths go */ + size_t next_code; /* where symbol stream goes */ + size_t pending_next_code; /* will be next_code */ + unsigned bit_len; + uint32_t bits; +}; + +/* + * Write out 16 bits, little-endian, for write_huffman_codes() + * + * As you'll notice, there's a bit to do. + * + * We are collecting up bits in a uint32_t, then when there are 16 of them we + * write out a word into the stream, using a trio of offsets (wc->next_code, + * wc->pending_next_code, and wc->head) which dance around ensuring that the + * bitstream and the interspersed lengths are in the right places relative to + * each other. + */ + +static inline bool write_bits(struct write_context *wc, + uint16_t code, uint16_t length) +{ + wc->bits <<= length; + wc->bits |= code; + wc->bit_len += length; + if (wc->bit_len > 16) { + uint32_t w = wc->bits >> (wc->bit_len - 16); + wc->bit_len -= 16; + if (wc->next_code + 2 > wc->dest_len || + unlikely(wc->bit_len > 16)) { + return false; + } + wc->dest[wc->next_code] = w & 0xff; + wc->dest[wc->next_code + 1] = (w >> 8) & 0xff; + wc->next_code = wc->pending_next_code; + wc->pending_next_code = wc->head; + wc->head += 2; + } + return true; +} + + +static inline bool write_code(struct write_context *wc, uint16_t code) +{ + int code_bit_len = bitlen_nonzero_16(code); + if (unlikely(code == 0)) { + return false; + } + code &= (1 << code_bit_len) - 1; + return write_bits(wc, code, code_bit_len); +} + +static inline bool write_byte(struct write_context *wc, uint8_t byte) +{ + if (wc->head + 1 > wc->dest_len) { + return false; + } + wc->dest[wc->head] = byte; + wc->head++; + return true; +} + + +static inline bool write_long_len(struct write_context *wc, size_t len) +{ + if (len < 65535) { + if (wc->head + 3 > wc->dest_len) { + return false; + } + wc->dest[wc->head] = 255; + wc->dest[wc->head + 1] = len & 255; + wc->dest[wc->head + 2] = len >> 8; + wc->head += 3; + } else { + if (wc->head + 7 > wc->dest_len) { + return false; + } + wc->dest[wc->head] = 255; + wc->dest[wc->head + 1] = 0; + wc->dest[wc->head + 2] = 0; + wc->dest[wc->head + 3] = len & 255; + wc->dest[wc->head + 4] = (len >> 8) & 255; + wc->dest[wc->head + 5] = (len >> 16) & 255; + wc->dest[wc->head + 6] = (len >> 24) & 255; + wc->head += 7; + } + return true; +} + +static ssize_t write_compressed_bytes(uint16_t symbol_values[512], + uint16_t *intermediate, + size_t intermediate_len, + uint8_t *dest, + size_t dest_len) +{ + bool ok; + size_t i; + size_t end; + struct write_context wc = { + .head = 4, + .pending_next_code = 2, + .dest = dest, + .dest_len = dest_len + }; + for (i = 0; i < intermediate_len; i++) { + uint16_t c = intermediate[i]; + size_t len; + uint16_t distance; + uint16_t code_len = 0; + uint16_t code_dist = 0; + if (c < 256) { + ok = write_code(&wc, symbol_values[c]); + if (!ok) { + return LZXPRESS_ERROR; + } + continue; + } + + if (c == 0xfffe) { + if (i > intermediate_len - 4) { + return LZXPRESS_ERROR; + } + + len = intermediate[i + 1]; + len |= (uint32_t)intermediate[i + 2] << 16; + distance = intermediate[i + 3]; + i += 3; + } else if (c == 0xffff) { + if (i > intermediate_len - 3) { + return LZXPRESS_ERROR; + } + len = intermediate[i + 1]; + distance = intermediate[i + 2]; + i += 2; + } else { + return LZXPRESS_ERROR; + } + if (unlikely(distance == 0)) { + return LZXPRESS_ERROR; + } + /* len has already had 3 subtracted */ + if (len >= 15) { + /* + * We are going to need to write extra length + * bytes into the stream, but we don't do it + * now, we do it after the code has been + * written (and before the distance bits). + */ + code_len = 15; + } else { + code_len = len; + } + code_dist = bitlen_nonzero_16(distance); + c = 256 | (code_dist << 4) | code_len; + if (c > 511) { + return LZXPRESS_ERROR; + } + + ok = write_code(&wc, symbol_values[c]); + if (!ok) { + return LZXPRESS_ERROR; + } + + if (code_len == 15) { + if (len >= 270) { + ok = write_long_len(&wc, len); + } else { + ok = write_byte(&wc, len - 15); + } + if (! ok) { + return LZXPRESS_ERROR; + } + } + if (code_dist != 0) { + uint16_t dist_bits = distance - (1 << code_dist); + ok = write_bits(&wc, dist_bits, code_dist); + if (!ok) { + return LZXPRESS_ERROR; + } + } + } + /* + * There are some intricacies around flushing the bits and returning + * the length. + * + * If the returned length is not exactly right and there is another + * block, that block will read its huffman table from the wrong place, + * and have all the symbol codes out by a multiple of 4. + */ + end = wc.head; + if (wc.bit_len == 0) { + end -= 2; + } + ok = write_bits(&wc, 0, 16 - wc.bit_len); + if (!ok) { + return LZXPRESS_ERROR; + } + for (i = 0; i < 2; i++) { + /* + * Flush out the bits with zeroes. It doesn't matter if we do + * a round too many, as we have buffer space, and have already + * determined the returned length (end). + */ + ok = write_bits(&wc, 0, 16); + if (!ok) { + return LZXPRESS_ERROR; + } + } + return end; +} + + +static ssize_t lzx_huffman_compress_block(struct lzxhuff_compressor_context *cmp_ctx, + struct lzxhuff_compressor_mem *cmp_mem, + size_t block_no) +{ + ssize_t intermediate_size; + uint16_t *hash_table = NULL; + uint16_t *back_window_hash_table = NULL; + ssize_t bytes_written; + + if (cmp_ctx->available_size - cmp_ctx->output_pos < 260) { + /* huffman block + 4 bytes */ + return LZXPRESS_ERROR; + } + + /* + * For LZ77 compression, we keep a hash table for the previous block, + * via alternation after the first block. + * + * LZ77 writes into the intermediate buffer in the cmp_mem context. + */ + if (block_no == 0) { + hash_table = cmp_mem->hash_table1; + back_window_hash_table = NULL; + } else if (block_no & 1) { + hash_table = cmp_mem->hash_table2; + back_window_hash_table = cmp_mem->hash_table1; + } else { + hash_table = cmp_mem->hash_table1; + back_window_hash_table = cmp_mem->hash_table2; + } + + intermediate_size = lz77_encode_block(cmp_ctx, + cmp_mem, + hash_table, + back_window_hash_table); + + if (intermediate_size < 0) { + return intermediate_size; + } + + /* + * Write the 256 byte Huffman table, based on the counts gained in + * LZ77 phase. + */ + bytes_written = write_huffman_table( + cmp_mem->symbol_values, + cmp_ctx->output + cmp_ctx->output_pos, + cmp_ctx->available_size - cmp_ctx->output_pos); + + if (bytes_written != 256) { + return LZXPRESS_ERROR; + } + cmp_ctx->output_pos += 256; + + /* + * Write the compressed bytes using the LZ77 matches and Huffman codes + * worked out in the previous steps. + */ + bytes_written = write_compressed_bytes( + cmp_mem->symbol_values, + cmp_mem->intermediate, + intermediate_size, + cmp_ctx->output + cmp_ctx->output_pos, + cmp_ctx->available_size - cmp_ctx->output_pos); + + if (bytes_written < 0) { + return bytes_written; + } + + cmp_ctx->output_pos += bytes_written; + return bytes_written; +} + +/* + * lzxpress_huffman_max_compressed_size() + * + * Return the most bytes the compression can take, to allow + * pre-allocation. + */ +size_t lzxpress_huffman_max_compressed_size(size_t input_size) +{ + /* + * In the worst case, the output size should be about the same as the + * input size, plus the 256 byte header per 64k block. We aim for + * ample, but within the order of magnitude. + */ + return input_size + (input_size / 8) + 270; +} + +/* + * lzxpress_huffman_compress_talloc() + * + * This is the convenience function that allocates the compressor context and + * output memory for you. The return value is the number of bytes written to + * the location indicated by the output pointer. + * + * The maximum input_size is effectively around 227MB due to the need to guess + * an upper bound on the output size that hits an internal limitation in + * talloc. + * + * @param mem_ctx TALLOC_CTX parent for the compressed buffer. + * @param input_bytes memory to be compressed. + * @param input_size length of the input buffer. + * @param output destination pointer for the compressed data. + * + * @return the number of bytes written or -1 on error. + */ + +ssize_t lzxpress_huffman_compress_talloc(TALLOC_CTX *mem_ctx, + const uint8_t *input_bytes, + size_t input_size, + uint8_t **output) +{ + struct lzxhuff_compressor_mem *cmp = NULL; + size_t alloc_size = lzxpress_huffman_max_compressed_size(input_size); + + ssize_t output_size; + + *output = talloc_array(mem_ctx, uint8_t, alloc_size); + if (*output == NULL) { + return LZXPRESS_ERROR; + } + + cmp = talloc(mem_ctx, struct lzxhuff_compressor_mem); + if (cmp == NULL) { + TALLOC_FREE(*output); + return LZXPRESS_ERROR; + } + + output_size = lzxpress_huffman_compress(cmp, + input_bytes, + input_size, + *output, + alloc_size); + + talloc_free(cmp); + + if (output_size < 0) { + TALLOC_FREE(*output); + return LZXPRESS_ERROR; + } + + *output = talloc_realloc(mem_ctx, *output, uint8_t, output_size); + if (*output == NULL) { + return LZXPRESS_ERROR; + } + + return output_size; +} + +/* + * lzxpress_huffman_compress() + * + * This is the inconvenience function, slightly faster and fiddlier than + * lzxpress_huffman_compress_talloc(). + * + * To use this, you need to have allocated (but not initialised) a `struct + * lzxhuff_compressor_mem`, and an output buffer. If the buffer is not big + * enough (per `output_size`), you'll get a negative return value, otherwise + * the number of bytes actually consumed, which will always be at least 260. + * + * The `struct lzxhuff_compressor_mem` is reusable -- it is basically a + * collection of uninitialised memory buffers. The total size is less than + * 150k, so stack allocation is plausible. + * + * input_size and available_size are limited to the minimum of UINT32_MAX and + * SSIZE_MAX. On 64 bit machines that will be UINT32_MAX, or 4GB. + * + * @param cmp_mem a struct lzxhuff_compressor_mem. + * @param input_bytes memory to be compressed. + * @param input_size length of the input buffer. + * @param output destination for the compressed data. + * @param available_size allocated output bytes. + * + * @return the number of bytes written or -1 on error. + */ +ssize_t lzxpress_huffman_compress(struct lzxhuff_compressor_mem *cmp_mem, + const uint8_t *input_bytes, + size_t input_size, + uint8_t *output, + size_t available_size) +{ + size_t i = 0; + struct lzxhuff_compressor_context cmp_ctx = { + .input_bytes = input_bytes, + .input_size = input_size, + .input_pos = 0, + .prev_block_pos = 0, + .output = output, + .available_size = available_size, + .output_pos = 0 + }; + + if (input_size == 0) { + /* + * We can't deal with this for a number of reasons (e.g. it + * breaks the Huffman tree), and the output will be infinitely + * bigger than the input. The caller needs to go and think + * about what they're trying to do here. + */ + return LZXPRESS_ERROR; + } + + if (input_size > SSIZE_MAX || + input_size > UINT32_MAX || + available_size > SSIZE_MAX || + available_size > UINT32_MAX || + available_size == 0) { + /* + * We use negative ssize_t to return errors, which is limiting + * on 32 bit machines; otherwise we adhere to Microsoft's 4GB + * limit. + * + * lzxpress_huffman_compress_talloc() will not get this far, + * having already have failed on talloc's 256 MB limit. + */ + return LZXPRESS_ERROR; + } + + if (cmp_mem == NULL || + output == NULL || + input_bytes == NULL) { + return LZXPRESS_ERROR; + } + + while (cmp_ctx.input_pos < cmp_ctx.input_size) { + ssize_t ret; + ret = lzx_huffman_compress_block(&cmp_ctx, + cmp_mem, + i); + if (ret < 0) { + return ret; + } + i++; + } + + return cmp_ctx.output_pos; +} + +static void debug_tree_codes(struct bitstream *input) +{ + /* + */ + size_t head = 0; + size_t tail = 2; + size_t ffff_count = 0; + struct q { + uint16_t tree_code; + uint16_t code_code; + }; + struct q queue[65536]; + char bits[17]; + uint16_t *t = input->table; + queue[0].tree_code = 1; + queue[0].code_code = 2; + queue[1].tree_code = 2; + queue[1].code_code = 3; + while (head < tail) { + struct q q = queue[head]; + uint16_t x = t[q.tree_code]; + if (x != 0xffff) { + int k; + uint16_t j = q.code_code; + size_t offset = bitlen_nonzero_16(j) - 1; + if (unlikely(j == 0)) { + DBG("BROKEN code is 0!\n"); + return; + } + + for (k = 0; k <= offset; k++) { + bool b = (j >> (offset - k)) & 1; + bits[k] = b ? '1' : '0'; + } + bits[k] = 0; + DBG("%03x %s\n", x & 511, bits); + head++; + continue; + } + ffff_count++; + queue[tail].tree_code = q.tree_code * 2 + 1; + queue[tail].code_code = q.code_code * 2; + tail++; + queue[tail].tree_code = q.tree_code * 2 + 1 + 1; + queue[tail].code_code = q.code_code * 2 + 1; + tail++; + head++; + } + DBG("0xffff count: %zu\n", ffff_count); +} + +/** + * Determines the sort order of one prefix_code_symbol relative to another + */ +static int compare_uint16(const uint16_t *a, const uint16_t *b) +{ + if (*a < *b) { + return -1; + } + if (*a > *b) { + return 1; + } + return 0; +} + + +static bool fill_decomp_table(struct bitstream *input) +{ + /* + * There are 512 symbols, each encoded in 4 bits, which indicates + * their depth in the Huffman tree. The even numbers get the lower + * nibble of each byte, so that the byte hex values look backwards + * (i.e. 0xab encodes b then a). These are allocated Huffman codes in + * order of appearance, per depth. + * + * For example, if the first two bytes were: + * + * 0x23 0x53 + * + * the first four codes have the lengths 3, 2, 3, 5. + * Let's call them A, B, C, D. + * + * Suppose there is no other codeword with length 1 (which is + * necessarily true in this example) or 2, but there might be others + * of length 3 or 4. Then we can say this about the codes: + * + * _ --*--_ + * / \ + * 0 1 + * / \ / \ + * 0 1 0 1 + * B |\ / \ |\ + * 0 1 0 1 0 1 + * A C |\ /| | |\ + * + * pos bits code + * A 3 010 + * B 2 00 + * C 3 011 + * D 5 1???? + * + * B has the shortest code, so takes the leftmost branch, 00. That + * ends the branch -- nothing else can start with 00. There are no + * more 2s, so we look at the 3s, starting as far left as possible. So + * A takes 010 and C takes 011. That means everything else has to + * start with 1xx. We don't know how many codewords of length 3 or 4 + * there are; if there are none, D would end up with 10000, the + * leftmost available code of length 5. If the compressor is any good, + * there should be no unused leaf nodes left dangling at the end. + * + * (this is "Canonical Huffman Coding"). + * + * + * But what symbols do these codes actually stand for? + * -------------------------------------------------- + * + * Good question. The first 256 codes stand for the corresponding + * literal bytes. The codes from 256 to 511 stand for LZ77 matches, + * which have a distance and a length, encoded in a strange way that + * isn't entirely the purview of this function. + * + * What does the value 0 mean? + * --------------------------- + * + * The code does not occur. For example, if the next byte in the + * example above was 0x07, that would give the byte 0x04 a 7-long + * code, and no code to the 0x05 byte, which means we there is no way + * we going to see a 5 in the decoded stream. + * + * Isn't LZ77 + Huffman what zip/gzip/zlib do? + * ------------------------------------------- + * + * Yes, DEFLATE is LZ77 + Huffman, but the details are quite different. + */ + uint16_t symbols[512]; + uint16_t sort_mem[512]; + size_t i, n_symbols; + ssize_t code; + uint16_t len = 0, prev_len; + const uint8_t *table_bytes = input->bytes + input->byte_pos; + + if (input->byte_pos + 260 > input->byte_size) { + return false; + } + + n_symbols = 0; + for (i = 0; i < 256; i++) { + uint16_t even = table_bytes[i] & 15; + uint16_t odd = table_bytes[i] >> 4; + if (even != 0) { + symbols[n_symbols] = (even << 9) + i * 2; + n_symbols++; + } + if (odd != 0) { + symbols[n_symbols] = (odd << 9) + i * 2 + 1; + n_symbols++; + } + } + input->byte_pos += 256; + if (n_symbols == 0) { + return false; + } + + stable_sort(symbols, sort_mem, n_symbols, sizeof(uint16_t), + (samba_compare_fn_t)compare_uint16); + + /* + * we're using an implicit binary tree, as you'd see in a heap. + * table[0] = unused + * table[1] = '0' + * table[2] = '1' + * table[3] = '00' <-- '00' and '01' are children of '0' + * table[4] = '01' <-- '0' is [0], children are [0 * 2 + {1,2}] + * table[5] = '10' + * table[6] = '11' + * table[7] = '000' + * table[8] = '001' + * table[9] = '010' + * table[10]= '011' + * table[11]= '100 + *' + * table[1 << n - 1] = '0' * n + * table[1 << n - 1 + x] = n-bit wide x (left padded with '0') + * table[1 << n - 2] = '1' * (n - 1) + * + * table[i]->left = table[i*2 + 1] + * table[i]->right = table[i*2 + 2] + * table[0xffff] = unused (16 '0's, max len is 15) + * + * therefore e.g. table[70] = table[64 - 1 + 7] + * = table[1 << 6 - 1 + 7] + * = '000111' (binary 7, widened to 6 bits) + * + * and if '000111' is a code, + * '00011', '0001', '000', '00', '0' are unavailable prefixes. + * 34 16 7 3 1 are their indices + * and (i - 1) >> 1 is the path back from 70 through these. + * + * the lookup is + * + * 1 start with i = 0 + * 2 extract a symbol bit (i = (i << 1) + bit + 1) + * 3 is table[i] == 0xffff? + * 4 yes -- goto 2 + * 4 table[i] & 511 is the symbol, stop + * + * and the construction (here) is sort of the reverse. + * + * Most of this table is free space that can never be reached, and + * most of the activity is at the beginning (since all codes start + * there, and by design the shortest codes are the most common). + */ + for (i = 0; i < 32; i++) { + /* prefill the table head */ + input->table[i] = 0xffff; + } + code = -1; + prev_len = 0; + for (i = 0; i < n_symbols; i++) { + uint16_t s = symbols[i]; + uint16_t prefix; + len = (s >> 9) & 15; + s &= 511; + code++; + while (len != prev_len) { + code <<= 1; + code++; + prev_len++; + } + + if (code >= 65535) { + return false; + } + input->table[code] = s; + for(prefix = (code - 1) >> 1; + prefix > 31; + prefix = (prefix - 1) >> 1) { + input->table[prefix] = 0xffff; + } + } + if (CHECK_DEBUGLVL(10)) { + debug_tree_codes(input); + } + + /* + * check that the last code encodes 11111..., with right number of + * ones, pointing to the right symbol -- otherwise we have a dangling + * uninitialised symbol. + */ + if (code != (1 << (len + 1)) - 2) { + return false; + } + return true; +} + + +#define CHECK_READ_32(dest) \ + do { \ + if (input->byte_pos + 4 > input->byte_size) { \ + return LZXPRESS_ERROR; \ + } \ + dest = PULL_LE_U32(input->bytes, input->byte_pos); \ + input->byte_pos += 4; \ + } while (0) + +#define CHECK_READ_16(dest) \ + do { \ + if (input->byte_pos + 2 > input->byte_size) { \ + return LZXPRESS_ERROR; \ + } \ + dest = PULL_LE_U16(input->bytes, input->byte_pos); \ + input->byte_pos += 2; \ + } while (0) + +#define CHECK_READ_8(dest) \ + do { \ + if (input->byte_pos >= input->byte_size) { \ + return LZXPRESS_ERROR; \ + } \ + dest = PULL_LE_U8(input->bytes, input->byte_pos); \ + input->byte_pos++; \ + } while(0) + + +static inline ssize_t pull_bits(struct bitstream *input) +{ + if (input->byte_pos + 1 < input->byte_size) { + uint16_t tmp; + CHECK_READ_16(tmp); + input->remaining_bits += 16; + input->bits <<= 16; + input->bits |= tmp; + } else if (input->byte_pos < input->byte_size) { + uint8_t tmp; + CHECK_READ_8(tmp); + input->remaining_bits += 8; + input->bits <<= 8; + input->bits |= tmp; + } else { + return LZXPRESS_ERROR; + } + return 0; +} + + +/* + * Decompress a block. The actual decompressed size is returned (or -1 on + * error). The putative block length is 64k (or shorter, if the message ends + * first), but a match can run over the end, extending the block. That's why + * we need the overall output size as well as the block size. A match encoded + * in this block can point back to previous blocks, but not before the + * beginning of the message, so we also need the previously decoded size. + * + * The compressed block will have 256 bytes for the Huffman table, and at + * least 4 bytes of (possibly padded) encoded values. + */ +static ssize_t lzx_huffman_decompress_block(struct bitstream *input, + uint8_t *output, + size_t block_size, + size_t output_size, + size_t previous_size) +{ + size_t output_pos = 0; + uint16_t symbol; + size_t index; + uint16_t distance_bits_wanted = 0; + size_t distance = 0; + size_t length = 0; + bool ok; + uint32_t tmp; + bool seen_eof_marker = false; + + ok = fill_decomp_table(input); + if (! ok) { + return LZXPRESS_ERROR; + } + if (CHECK_DEBUGLVL(10) || DEBUG_HUFFMAN_TREE) { + debug_huffman_tree_from_table(input->table); + } + /* + * Always read 32 bits at the start, even if we don't need them. + */ + CHECK_READ_16(tmp); + CHECK_READ_16(input->bits); + input->bits |= tmp << 16; + input->remaining_bits = 32; + + /* + * This loop iterates over individual *bits*. These are read from + * little-endian 16 bit words, most significant bit first. + * + * At points in the bitstream, the following are possible: + * + * # the source word is empty and needs to be refilled from the input + * stream. + * # an incomplete codeword is being extended. + * # a codeword is resolved, either as a literal or a match. + * # a literal is written. + * # a match is collecting distance bits. + * # the output stream is copied, as specified by a match. + * # input bytes are read for match lengths. + * + * Note that we *don't* specifically check for the EOF marker (symbol + * 256) in this loop, because the precondition for stopping for the + * EOF marker is that the output buffer is full (otherwise, you + * wouldn't know which 256 is EOF, rather than an actual symbol), and + * we *always* want to stop when the buffer is full. So we work out if + * there is an EOF in another loop after we stop writing. + */ + + index = 0; + while (output_pos < block_size) { + uint16_t b; + if (input->remaining_bits == 16) { + ssize_t ret = pull_bits(input); + if (ret) { + return ret; + } + } + input->remaining_bits--; + + b = (input->bits >> input->remaining_bits) & 1; + if (length == 0) { + /* not in a match; pulling a codeword */ + index <<= 1; + index += b + 1; + if (input->table[index] == 0xffff) { + /* incomplete codeword, the common case */ + continue; + } + /* found the symbol, reset the code string */ + symbol = input->table[index] & 511; + index = 0; + if (symbol < 256) { + /* a literal, the easy case */ + output[output_pos] = symbol; + output_pos++; + continue; + } + + /* the beginning of a match */ + distance_bits_wanted = (symbol >> 4) & 15; + distance = 1 << distance_bits_wanted; + length = symbol & 15; + if (length == 15) { + CHECK_READ_8(tmp); + length += tmp; + if (length == 255 + 15) { + /* + * note, we discard (don't add) the + * length so far. + */ + CHECK_READ_16(length); + if (length == 0) { + CHECK_READ_32(length); + } + } + } + length += 3; + } else { + /* we are pulling extra distance bits */ + distance_bits_wanted--; + distance |= b << distance_bits_wanted; + } + + if (distance_bits_wanted == 0) { + /* + * We have a complete match, and it is time to do the + * copy (byte by byte, because the ranges can overlap, + * and we might need to copy bytes we just copied in). + * + * It is possible that this match will extend beyond + * the end of the expected block. That's fine, so long + * as it doesn't extend past the total output size. + */ + size_t i; + size_t end = output_pos + length; + uint8_t *here = output + output_pos; + uint8_t *there = here - distance; + if (end > output_size || + previous_size + output_pos < distance || + unlikely(end < output_pos || there > here)) { + return LZXPRESS_ERROR; + } + for (i = 0; i < length; i++) { + here[i] = there[i]; + } + output_pos += length; + distance = 0; + length = 0; + } + } + + if (length != 0 || index != 0) { + /* it seems like we've hit an early end, mid-code */ + return LZXPRESS_ERROR; + } + + if (input->byte_pos + 256 < input->byte_size) { + /* + * This block is over, but it clearly isn't the last block, so + * we don't want to look for the EOF. + */ + return output_pos; + } + /* + * We won't write any more, but we try to read some more to make sure + * we're finishing in a good place. That means we want to see a 256 + * symbol and then some number of zeroes, possibly zero, but as many + * as 32. + * + * In this we are perhaps a bit stricter than Windows, which + * apparently does not insist on the EOF marker, nor on a lack of + * trailing bytes. + */ + while (true) { + uint16_t b; + if (input->remaining_bits == 16) { + ssize_t ret; + if (input->byte_pos == input->byte_size) { + /* FIN */ + break; + } + ret = pull_bits(input); + if (ret) { + return ret; + } + } + input->remaining_bits--; + b = (input->bits >> input->remaining_bits) & 1; + if (seen_eof_marker) { + /* + * we have read an EOF symbols. Now we just want to + * see zeroes. + */ + if (b != 0) { + return LZXPRESS_ERROR; + } + continue; + } + + /* we're pulling in a symbol, which had better be 256 */ + index <<= 1; + index += b + 1; + if (input->table[index] == 0xffff) { + continue; + } + + symbol = input->table[index] & 511; + if (symbol != 256) { + return LZXPRESS_ERROR; + } + seen_eof_marker = true; + continue; + } + + if (! seen_eof_marker) { + return LZXPRESS_ERROR; + } + + return output_pos; +} + +static ssize_t lzxpress_huffman_decompress_internal(struct bitstream *input, + uint8_t *output, + size_t output_size) +{ + size_t output_pos = 0; + + if (input->byte_size < 260) { + return LZXPRESS_ERROR; + } + + while (input->byte_pos < input->byte_size) { + ssize_t block_output_pos; + ssize_t block_output_size; + size_t remaining_output_size = output_size - output_pos; + + block_output_size = MIN(65536, remaining_output_size); + + block_output_pos = lzx_huffman_decompress_block( + input, + output + output_pos, + block_output_size, + remaining_output_size, + output_pos); + + if (block_output_pos < block_output_size) { + return LZXPRESS_ERROR; + } + output_pos += block_output_pos; + if (output_pos > output_size) { + /* not expecting to get here. */ + return LZXPRESS_ERROR; + } + } + + if (input->byte_pos != input->byte_size) { + return LZXPRESS_ERROR; + } + + return output_pos; +} + + +/* + * lzxpress_huffman_decompress() + * + * output_size must be the expected length of the decompressed data. + * input_size and output_size are limited to the minimum of UINT32_MAX and + * SSIZE_MAX. On 64 bit machines that will be UINT32_MAX, or 4GB. + * + * @param input_bytes memory to be decompressed. + * @param input_size length of the compressed buffer. + * @param output destination for the decompressed data. + * @param output_size exact expected length of the decompressed data. + * + * @return the number of bytes written or -1 on error. + */ + +ssize_t lzxpress_huffman_decompress(const uint8_t *input_bytes, + size_t input_size, + uint8_t *output, + size_t output_size) +{ + uint16_t table[65536]; + struct bitstream input = { + .bytes = input_bytes, + .byte_size = input_size, + .byte_pos = 0, + .bits = 0, + .remaining_bits = 0, + .table = table + }; + + if (input_size > SSIZE_MAX || + input_size > UINT32_MAX || + output_size > SSIZE_MAX || + output_size > UINT32_MAX || + input_size == 0 || + output_size == 0 || + input_bytes == NULL || + output == NULL) { + /* + * We use negative ssize_t to return errors, which is limiting + * on 32 bit machines, and the 4GB limit exists on Windows. + */ + return LZXPRESS_ERROR; + } + + return lzxpress_huffman_decompress_internal(&input, + output, + output_size); +} + + +/** + * lzxpress_huffman_decompress_talloc() + * + * The caller must provide the exact size of the expected output. + * + * The input_size is limited to the minimum of UINT32_MAX and SSIZE_MAX, but + * output_size is limited to 256MB due to a limit in talloc. This effectively + * limits input_size too, as non-crafted compressed data will not exceed the + * decompressed size by very much. + * + * @param mem_ctx TALLOC_CTX parent for the decompressed buffer. + * @param input_bytes memory to be decompressed. + * @param input_size length of the compressed buffer. + * @param output_size expected decompressed size. + * + * @return a talloc'ed buffer exactly output_size in length, or NULL. + */ + +uint8_t *lzxpress_huffman_decompress_talloc(TALLOC_CTX *mem_ctx, + const uint8_t *input_bytes, + size_t input_size, + size_t output_size) +{ + ssize_t result; + uint8_t *output = NULL; + struct bitstream input = { + .bytes = input_bytes, + .byte_size = input_size + }; + + output = talloc_array(mem_ctx, uint8_t, output_size); + if (output == NULL) { + return NULL; + } + + input.table = talloc_array(mem_ctx, uint16_t, 65536); + if (input.table == NULL) { + talloc_free(output); + return NULL; + } + result = lzxpress_huffman_decompress_internal(&input, + output, + output_size); + talloc_free(input.table); + + if (result != output_size) { + talloc_free(output); + return NULL; + } + return output; +} diff --git a/lib/compression/lzxpress_huffman.h b/lib/compression/lzxpress_huffman.h new file mode 100644 index 0000000..41cca5c --- /dev/null +++ b/lib/compression/lzxpress_huffman.h @@ -0,0 +1,95 @@ +/* + * Samba compression library - LGPLv3 + * + * Copyright © Catalyst IT 2022 + * + * ** NOTE! The following LGPL license applies to this file. + * ** It does NOT imply that all of Samba is released under the LGPL + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#ifndef HAVE_LZXPRESS_HUFFMAN_H +#define HAVE_LZXPRESS_HUFFMAN_H + + +struct huffman_node { + struct huffman_node *left; + struct huffman_node *right; + uint32_t count; + uint16_t symbol; + int8_t depth; +}; + + +/* + * LZX_HUFF_COMP_HASH_BITS is how big to make the hash tables + * (12 means 4096, etc). + * + * A larger number (up to 16) will be faster on long messages (fewer + * collisions), but probably slower on short ones (more prep). + */ +#define LZX_HUFF_COMP_HASH_BITS 14 + + +/* + * This struct just coalesces all the memory you need for LZ77 + Huffman + * compression together in one bundle. + * + * There are a few different things you want, you usually want them all, so + * this makes it easy to allocate them all at once. + */ + +struct lzxhuff_compressor_mem { + struct huffman_node leaf_nodes[512]; + struct huffman_node internal_nodes[512]; + uint16_t symbol_values[512]; + uint16_t intermediate[65536 + 6]; + uint16_t hash_table1[1 << LZX_HUFF_COMP_HASH_BITS]; + uint16_t hash_table2[1 << LZX_HUFF_COMP_HASH_BITS]; +}; + + +ssize_t lzxpress_huffman_compress(struct lzxhuff_compressor_mem *cmp, + const uint8_t *input_bytes, + size_t input_size, + uint8_t *output, + size_t available_size); + + +ssize_t lzxpress_huffman_compress_talloc(TALLOC_CTX *mem_ctx, + const uint8_t *input_bytes, + size_t input_size, + uint8_t **output); + +ssize_t lzxpress_huffman_decompress(const uint8_t *input, + size_t input_size, + uint8_t *output, + size_t max_output_size); + +uint8_t *lzxpress_huffman_decompress_talloc(TALLOC_CTX *mem_ctx, + const uint8_t *input_bytes, + size_t input_size, + size_t output_size); + +/* + * lzxpress_huffman_max_compressed_size() + * + * Return the most bytes the compression can take, to allow + * pre-allocation. + */ +size_t lzxpress_huffman_max_compressed_size(size_t input_size); + + +#endif /* HAVE_LZXPRESS_HUFFMAN_H */ diff --git a/lib/compression/pycompression.c b/lib/compression/pycompression.c new file mode 100644 index 0000000..5c3fd82 --- /dev/null +++ b/lib/compression/pycompression.c @@ -0,0 +1,304 @@ +/* + Samba Unix SMB/CIFS implementation. + + Python bindings for compression functions. + + Copyright (C) Petr Viktorin 2015 + Copyright (C) Douglas Bagnall 2022 + + ** NOTE! The following LGPL license applies to the talloc + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ + +#include "includes.h" +#include +#include "lib/replace/system/python.h" +#include "lzxpress.h" +#include "lzxpress_huffman.h" + +/* CompressionError is filled out in module init */ +static PyObject *CompressionError = NULL; + +static PyObject *plain_compress(PyObject *mod, PyObject *args) +{ + uint8_t *src = NULL; + Py_ssize_t src_len; + char *dest = NULL; + Py_ssize_t dest_len; + PyObject *dest_obj = NULL; + size_t alloc_len; + int ret; + + if (!PyArg_ParseTuple(args, "s#", &src, &src_len)) { + return NULL; + } + + /* + * 9/8 + 4 is the worst case growth, but we add room. + * + * alloc_len can't overflow as src_len is ssize_t while alloc_len is + * size_t. + */ + alloc_len = src_len + src_len / 8 + 500; + + dest_obj = PyBytes_FromStringAndSize(NULL, alloc_len); + if (dest_obj == NULL) { + return NULL; + } + dest = PyBytes_AS_STRING(dest_obj); + + dest_len = lzxpress_compress(src, + src_len, + (uint8_t *)dest, + alloc_len); + if (dest_len < 0) { + PyErr_SetString(CompressionError, "unable to compress data"); + Py_DECREF(dest_obj); + return NULL; + } + + ret = _PyBytes_Resize(&dest_obj, dest_len); + if (ret != 0) { + /* + * Don't try to free dest_obj, as we're in deep MemoryError + * territory here. + */ + return NULL; + } + return dest_obj; +} + + +static PyObject *plain_decompress(PyObject *mod, PyObject *args) +{ + uint8_t *src = NULL; + Py_ssize_t src_len; + char *dest = NULL; + Py_ssize_t dest_len; + PyObject *dest_obj = NULL; + Py_ssize_t alloc_len = 0; + Py_ssize_t given_len = 0; + int ret; + + if (!PyArg_ParseTuple(args, "s#|n", &src, &src_len, &given_len)) { + return NULL; + } + if (given_len != 0) { + /* + * With plain decompression, we don't *need* the exact output + * size (as we do with LZ77+Huffman), but it certainly helps + * when guessing the size. + */ + alloc_len = given_len; + } else if (src_len > UINT32_MAX) { + /* + * The underlying decompress function will reject this, but by + * checking here we can give a better message and be clearer + * about overflow risks. + * + * Note, the limit is actually the smallest of UINT32_MAX and + * SSIZE_MAX, but src_len is ssize_t so it already can't + * exceed that. + */ + PyErr_Format(CompressionError, + "The maximum size for compressed data is 4GB " + "cannot decompress %zu bytes.", src_len); + } else { + /* + * The data can expand massively (though not beyond the + * 4GB limit) so we guess a big number for small inputs + * (we expect small inputs), and a relatively conservative + * number for big inputs. + */ + if (src_len <= 3333333) { + alloc_len = 10000000; + } else if (src_len > UINT32_MAX / 3) { + alloc_len = UINT32_MAX; + } else { + alloc_len = src_len * 3; + } + } + + dest_obj = PyBytes_FromStringAndSize(NULL, alloc_len); + if (dest_obj == NULL) { + return NULL; + } + dest = PyBytes_AS_STRING(dest_obj); + + dest_len = lzxpress_decompress(src, + src_len, + (uint8_t *)dest, + alloc_len); + if (dest_len < 0) { + if (alloc_len == given_len) { + PyErr_Format(CompressionError, + "unable to decompress data into a buffer " + "of %zd bytes.", alloc_len); + } else { + PyErr_Format(CompressionError, + "unable to decompress data into a buffer " + "of %zd bytes. If you know the length, " + "supply it as the second argument.", + alloc_len); + } + Py_DECREF(dest_obj); + return NULL; + } + + ret = _PyBytes_Resize(&dest_obj, dest_len); + if (ret != 0) { + /* + * Don't try to free dest_obj, as we're in deep MemoryError + * territory here. + */ + return NULL; + } + return dest_obj; +} + + + +static PyObject *huffman_compress(PyObject *mod, PyObject *args) +{ + uint8_t *src = NULL; + Py_ssize_t src_len; + char *dest = NULL; + Py_ssize_t dest_len; + PyObject *dest_obj = NULL; + size_t alloc_len; + int ret; + struct lzxhuff_compressor_mem cmp_mem; + + if (!PyArg_ParseTuple(args, "s#", &src, &src_len)) { + return NULL; + } + /* + * worst case is roughly 256 per 64k or less. + * + * alloc_len won't overflow as src_len is ssize_t while alloc_len is + * size_t. + */ + alloc_len = src_len + src_len / 8 + 500; + + dest_obj = PyBytes_FromStringAndSize(NULL, alloc_len); + if (dest_obj == NULL) { + return NULL; + } + dest = PyBytes_AS_STRING(dest_obj); + + dest_len = lzxpress_huffman_compress(&cmp_mem, + src, + src_len, + (uint8_t *)dest, + alloc_len); + if (dest_len < 0) { + PyErr_SetString(CompressionError, "unable to compress data"); + Py_DECREF(dest_obj); + return NULL; + } + + ret = _PyBytes_Resize(&dest_obj, dest_len); + if (ret != 0) { + return NULL; + } + return dest_obj; +} + + +static PyObject *huffman_decompress(PyObject *mod, PyObject *args) +{ + uint8_t *src = NULL; + Py_ssize_t src_len; + char *dest = NULL; + Py_ssize_t dest_len; + PyObject *dest_obj = NULL; + Py_ssize_t given_len = 0; + /* + * Here it is always necessary to supply the exact length. + */ + + if (!PyArg_ParseTuple(args, "s#n", &src, &src_len, &given_len)) { + return NULL; + } + + dest_obj = PyBytes_FromStringAndSize(NULL, given_len); + if (dest_obj == NULL) { + return NULL; + } + dest = PyBytes_AS_STRING(dest_obj); + + dest_len = lzxpress_huffman_decompress(src, + src_len, + (uint8_t *)dest, + given_len); + if (dest_len != given_len) { + PyErr_Format(CompressionError, + "unable to decompress data into a %zd bytes.", + given_len); + Py_DECREF(dest_obj); + return NULL; + } + /* no resize here */ + return dest_obj; +} + + +static PyMethodDef mod_methods[] = { + { "plain_compress", (PyCFunction)plain_compress, METH_VARARGS, + "compress bytes using lzxpress plain compression"}, + { "plain_decompress", (PyCFunction)plain_decompress, METH_VARARGS, + "decompress lzxpress plain compressed bytes"}, + { "huffman_compress", (PyCFunction)huffman_compress, METH_VARARGS, + "compress bytes using lzxpress plain compression"}, + { "huffman_decompress", (PyCFunction)huffman_decompress, METH_VARARGS, + "decompress lzxpress plain compressed bytes"}, + {0} +}; + + +#define MODULE_DOC PyDoc_STR("LZXpress compression/decompression bindings") + +static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + .m_name = "compression", + .m_doc = MODULE_DOC, + .m_size = -1, + .m_methods = mod_methods, +}; + + +static PyObject *module_init(void) +{ + PyObject *m = PyModule_Create(&moduledef); + if (m == NULL) { + return NULL; + } + + CompressionError = PyErr_NewException( + "compression.CompressionError", + PyExc_Exception, + NULL); + PyModule_AddObject(m, "CompressionError", CompressionError); + + return m; +} + +PyMODINIT_FUNC PyInit_compression(void); +PyMODINIT_FUNC PyInit_compression(void) +{ + return module_init(); +} diff --git a/lib/compression/tests/scripts/README b/lib/compression/tests/scripts/README new file mode 100644 index 0000000..aff1047 --- /dev/null +++ b/lib/compression/tests/scripts/README @@ -0,0 +1,19 @@ +Tools used in the development and testing the LZ77 + Huffman code. + +These might not be of use to anyone ever, but here they are. + +make-fuzz-examples +encodes compressed files for decompression fuzzer. + +decode-huffman-header +print Huffman codes and validate first header in a compressed file. + +three-byte-hash +check that a three byte hash works. + +make-test-vectors +make files with randomly unbalanced symbol distributions. + +generate-windows-test-vectors.c +if compiled under Cygwin or similar on Windows, this can be used to +generate and verify test vectors. diff --git a/lib/compression/tests/scripts/decode-huffman-header b/lib/compression/tests/scripts/decode-huffman-header new file mode 100755 index 0000000..9c1279b --- /dev/null +++ b/lib/compression/tests/scripts/decode-huffman-header @@ -0,0 +1,54 @@ +#!/usr/bin/python3 +"""Print the codes in the first Huffman tree header in the given file. + +USAGE: decode-huffman-header FILE + +The number of codes of different length is printed first, followed by +the implied total frequency of codes, followed by the deduced codes. + +If the total is not 1.0, the header is invalid. +""" + +import sys + + +if '--help' in sys.argv or '-h' in sys.argv or len(sys.argv) != 2: + print(__doc__) + exit(len(sys.argv) != 2) + + +def read_table(data): + lengths = [[] for x in range(16)] + for i, b in enumerate(data): + even = b & 15 + odd = b >> 4 + lengths[even].append(i * 2) + lengths[odd].append(i * 2 + 1) + + code = 0 + + total = 0.0 + for i, bucket in enumerate(lengths): + if bucket and i: + portion = 1.0 / (1 << i) * len(bucket) + total += portion + print(f"length {i:2}: {len(bucket):4} ({portion})") + print(f"total {total}") + + for i, bucket in enumerate(lengths): + if i == 0: + continue + code <<= 1 + for c in bucket: + print(f'{c:03x} {code:0{i}b}') + code += 1 + + +def main(): + fn = sys.argv[1] + with open(fn, 'rb') as f: + data = f.read(256) + read_table(data) + + +main() diff --git a/lib/compression/tests/scripts/generate-windows-test-vectors.c b/lib/compression/tests/scripts/generate-windows-test-vectors.c new file mode 100644 index 0000000..28724d2 --- /dev/null +++ b/lib/compression/tests/scripts/generate-windows-test-vectors.c @@ -0,0 +1,206 @@ +/* + * Generate test vectorsa for Windows LZ77 Huffman compression. + * + * Copyright (c) 2022 Douglas Bagnall + * + * GPLv3+. + * + * Can be compiled on Windows 2012r2 under Cygwin + * + * gcc -o generate-windows-test-vectors \ + * generate-windows-test-vectors.c \ + * C:\Windows\SysWOW64\cabinet.dll \ + * -lcabinet + * + * There might be better ways. + * + * See https://learn.microsoft.com/en-us/windows/win32/cmpapi/-compression-portal + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +/* compressapi.h is in the Windows API. mingw-w64 has a copy. */ +#include +#include + +struct blob { + uint8_t *data; + size_t length; +}; + +/* Windows size_t is different than Cygwin size_t (though still 64 bit) */ +typedef unsigned long long wsize_t; + + +#define compression_flags (COMPRESS_ALGORITHM_XPRESS_HUFF | COMPRESS_RAW) + +int32_t compression_level = 0; + +static struct blob compress(struct blob input) +{ + COMPRESSOR_HANDLE handle; + struct blob output; + bool ok; + wsize_t used; + + ok = CreateCompressor(compression_flags, NULL, &handle); + + if (! ok) { + fprintf(stderr, "CreateCompressor failed\n"); + exit(1); + } + + output.length = input.length * 3 + 256; + output.data = malloc(output.length); + if (output.data == NULL) { + fprintf(stderr, "output allocation failed (estimated %zu)\n", + output.length); + exit(1); + } + + + ok = SetCompressorInformation(handle, + COMPRESS_INFORMATION_CLASS_LEVEL, + &compression_level, + sizeof(compression_level)); + + if (! ok) { + fprintf(stderr, "SetCompressorInformation failed: %d\n", + GetLastError()); + //exit(1); + } + + ok = Compress(handle, + input.data, + input.length, + output.data, + output.length, + &used); + if (! ok) { + fprintf(stderr, "Compress failed\n"); + exit(1); + } + output.data = realloc(output.data, used); + if (output.data == NULL) { + fprintf(stderr, + "failed to shrinkwrap output! (from %zu to %llu)\n", + output.length, used); + exit(1); + } + output.length = used; + CloseCompressor(handle); + return output; +} + + +struct blob decompress(struct blob input, + size_t expected_size) +{ + DECOMPRESSOR_HANDLE handle; + struct blob output; + bool ok; + wsize_t used; + + ok = CreateDecompressor(compression_flags, NULL, &handle); + + if (! ok) { + fprintf(stderr, "CreateDecompressor failed\n"); + exit(1); + } + + output.length = expected_size; + output.data = malloc(output.length); + if (output.data == NULL) { + fprintf(stderr, "output allocation failed (%zu)\n", + output.length); + exit(1); + } + + ok = Decompress(handle, + input.data, + input.length, + output.data, + output.length, + &used); + if (! ok) { + fprintf(stderr, "Decompress failed\n"); + exit(1); + } + CloseDecompressor(handle); + return output; +} + + +static void __attribute__((noreturn)) usage(int ret) +{ + fprintf(stderr, + "USAGE: test-win-vectors {c,d} filename [length|level] > DEST\n\n"); + fprintf(stderr, "c for< compression, d for decompression\n"); + fprintf(stderr, "decompressed length is required for decompression\n"); + fprintf(stderr, "compression level flag is optional [default 0]\n"); + exit(ret); +} + +int main(int argc, const char *argv[]) +{ + FILE *fh; + const char *filename; + struct stat s; + int ret; + struct blob input = {0}; + struct blob output = {0}; + + if (argc < 3 || argc > 4) { + usage(1); + } + filename = argv[2]; + + fh = fopen(filename, "rb"); + if (fh == NULL) { + fprintf(stderr, "Could not open %s\n", filename); + usage(1); + } + + ret = fstat(fileno(fh), &s); + if (ret != 0) { + fprintf(stderr, "Could not stat %s: %d\n", filename, ret); + usage(1); + } + input.length = s.st_size; + input.data = malloc(input.length); + if (input.data == NULL) { + fprintf(stderr, "input too big for memory?! (%zu)\n", + s.st_size); + exit(1); + } + + fread(input.data, 1, input.length, fh); + + if (strcmp(argv[1], "c") == 0) { + if (argc == 4 && strcmp(argv[3], "0")) { + compression_level = 1; + } + output = compress(input); + } else if (strcmp(argv[1], "d") == 0) { + size_t decomp_size; + if (argc != 4) { + fprintf(stderr, "no length given\n"); + usage(1); + } + decomp_size = atoi(argv[3]); + output = decompress(input, decomp_size); + } else { + usage(1); + } + fwrite(output.data, 1, output.length, stdout); + free(output.data); + return 0; +} diff --git a/lib/compression/tests/scripts/make-fuzz-examples b/lib/compression/tests/scripts/make-fuzz-examples new file mode 100755 index 0000000..09200fb --- /dev/null +++ b/lib/compression/tests/scripts/make-fuzz-examples @@ -0,0 +1,45 @@ +#!/usr/bin/python3 +# +"""Pack the compressed files created by test_lzx_huffman.c (with +LZXHUFF_DEBUG_FILES) into the format used by the decompression fuzzer. + +That is, the first 3 bytes are the length of the decompressed file, +and the rest of the file is the compressed data. + +USAGE: make-fuzz-examples DIR + +where DIR is probably '/tmp'. +""" +import os +import sys + + +if '--help' in sys.argv or '-h' in sys.argv or len(sys.argv) != 2: + print(__doc__) + exit(len(sys.argv) != 2) + + +def main(): + files = set(os.listdir(sys.argv[1])) + + for fn in files: + if fn.endswith('-compressed'): + fn2 = fn.replace('-compressed', '-decompressed') + if fn2 not in files: + print(f"skipping {fn}, no {fn2}") + continue + cfn = '/tmp/' + fn + dfn = '/tmp/' + fn2 + wfn = '/tmp/' + fn.replace('-compressed', '.fuzz') + + size = os.stat(dfn).st_size + sbytes = bytes([(size & 0xff), (size >> 8) & 0xff, (size >> 16) & 0xff]) + + with open(cfn, 'rb') as f: + s = f.read() + + with open(wfn, 'wb') as f: + s = f.write(sbytes + s) + + +main() diff --git a/lib/compression/tests/scripts/make-test-vectors b/lib/compression/tests/scripts/make-test-vectors new file mode 100755 index 0000000..6f25866 --- /dev/null +++ b/lib/compression/tests/scripts/make-test-vectors @@ -0,0 +1,185 @@ +#!/usr/bin/python3 +"""Generate a few strings with unbalanced distributions to test the +regeneration of the Huffman tree when it gets too deep. + +USAGE: make-test-vectors DIR + +This will fill up DIR with test files. +""" +import sys +import random +from collections import defaultdict + + +if '--help' in sys.argv or '-h' in sys.argv or len(sys.argv) != 2: + print(__doc__) + exit(len(sys.argv) != 2) + + +DIR = sys.argv[1] + +SIZE = (1 << 17) + (23) # two and a bit blocks +SIZE_NAME = "128k+" +# SIZE = (1 << 16) +# SIZE_NAME = "64" + + +random.seed(1) + + +def squares(n): + array = [] + for i in range(n): + a = random.random() + b = random.random() + array.append(int(a * b * 256)) + return bytes(array) + + +def skewed_choices(n): + b = list(range(256)) + array = random.choices(b, weights=b, k=n) + return bytes(array) + + +def fib_shuffle(n): + array = [] + a, b = 1, 1 + for i in range(100): + array.extend([i] * a) + a, b = a + b, a + if len(array) > 1000000: + break + random.shuffle(array) + return bytes(array[:n]) + + +def exp_shuffle(n): + array = [] + for i in range(256): + array.extend([i] * int(1.04 ** i)) + if len(array) > 1000000: + break + random.shuffle(array) + return bytes(array[:n]) + + +def and_rand(n): + array = [] + for i in range(n): + a = random.randrange(256) + b = random.randrange(256) + array.append(a & b) + return bytes(array) + + +def betavar(n, a, b): + array = [] + for i in range(n): + x = random.betavariate(a, b) + array.append(int(x * 255.999999999999)) + return bytes(array) + + +def repeated_alphabet(n): + a = b'abcdefghijklmnopqrstuvwxyz' + na = n // len(a) + 1 + s = a * na + return s[:n] + + +def decayed_alphabet(n): + s = list(repeated_alphabet(n)) + for i in range(256): + j = random.randrange(n) + s[j] = i + + return bytes(s) + + +def trigram_model(n): + with open(__file__, 'rb') as f: + data = f.read() + lut = defaultdict(list) + for a, b, c in zip(data, data[1:], data[2:]): + k = bytes([a, b]) + lut[k].append(c) + + k = random.choice(list(lut.keys())) + s = [] + p = k[1] + for i in range(n + 10): + c = random.choice(lut[k]) + s.append(c) + k = bytes([p, c]) + p = c + + return bytes(s[10:]) + + +def trigram_sum_model(n): + with open(__file__, 'rb') as f: + data = f.read() + lut = [[random.randrange(256)] for i in range(512)] + for a, b, c in zip(data, data[1:], data[2:]): + lut[a + b].append(c) + + s = [] + i = random.randrange(len(data) - 1) + a = data[i] + b = data[i + 1] + + for i in range(n + 10): + x = lut[a + b] + c = random.choice(x) + s.append(c) + a = b + b = c + + return bytes(s[10:]) + + +def the_classics(): + # this used to be main() + sq = squares(SIZE) + ch = skewed_choices(SIZE) + fs = fib_shuffle(SIZE) + es = exp_shuffle(SIZE) + ar = and_rand(SIZE) + bv1 = betavar(SIZE, 0.1, 1.5) + bv2 = betavar(SIZE, 0.5, 2.0) + bv3 = betavar(SIZE, 0.05, 0.05) + + print("n sq ch fs es") + for i in range(256): + print(f"{i:3} {sq.count(i):5} {ch.count(i):5} " + f"{fs.count(i):5} {es.count(i):5}" + f"{ar.count(i):5} {bv1.count(i):5}" + f"{bv2.count(i):5} {bv3.count(i):5}" + ) + + for series, fn in ((sq, "square_series"), + (ch, "skewed_choices"), + (fs, "fib_shuffle"), + (es, "exp_shuffle"), + (ar, "and_rand"), + (bv1, "beta-variate1"), + (bv2, "beta-variate2"), + (bv3, "beta-variate3"), + ): + with open(f"{DIR}/{fn}-{SIZE_NAME}", "wb") as f: + f.write(series) + + +def main(): + if True: + the_classics() + for series, fn in ((decayed_alphabet(SIZE), "decayed_alphabet"), + (trigram_model(SIZE), "trigram"), + (trigram_sum_model(SIZE), "trigram_sum"), + ): + with open(f"{DIR}/{fn}_{SIZE_NAME}", "wb") as f: + f.write(series) + + +main() diff --git a/lib/compression/tests/scripts/three-byte-hash b/lib/compression/tests/scripts/three-byte-hash new file mode 100755 index 0000000..100d0bc --- /dev/null +++ b/lib/compression/tests/scripts/three-byte-hash @@ -0,0 +1,49 @@ +#!/usr/bin/python3 +"""Print statistics about a certain three byte hash. + +USAGE: three_byte_hash +""" +import sys + +if '--help' in sys.argv or '-h' in sys.argv or len(sys.argv) > 1: + print(__doc__) + exit(not ('--help' in sys.argv or '-h' in sys.argv)) + + +from statistics import mean, pstdev, median + + +def h(*args, bits=12): + a = args[0] + b = args[1] ^ 0x2e + c = args[2] ^ 0x55 + d = ((a + b) << 8) ^ (((c - a) & 0xffff) << 5) ^ (c + b) ^ (0xcab + a) + return d & ((1 << bits) - 1) + + +def count(fn, bits, filter=None): + counts = [0] * (1 << bits) + for i in range(256 ** 3): + a, b, c = i & 255, (i >> 8) & 255, i >> 16 + if filter and not (filter(a) and filter(b) and filter(c)): + continue + + h = fn(a, b, c, bits=bits) + counts[h] += 1 + + print(f" {bits} bits; {len(counts)} buckets, " + f"expected {(1<<24) / len(counts)}") + print(f"median {median(counts)}") + print(f"mean {mean(counts)}") + print(f"min {min(counts)}") + print(f"max {max(counts)}") + print(f"stddev {pstdev(counts)}") + + +for b in (12, 13, 14): + count(h, b) + + print("With ASCII filter") + letters = set(range(32, 127)) + letters |= set(b'\r\n\t\0') + count(h, b, filter=letters.__contains__) diff --git a/lib/compression/tests/test_lzx_huffman.c b/lib/compression/tests/test_lzx_huffman.c new file mode 100644 index 0000000..7770535 --- /dev/null +++ b/lib/compression/tests/test_lzx_huffman.c @@ -0,0 +1,1255 @@ +/* + * Samba compression library - LGPLv3 + * + * Copyright © Catalyst IT 2022 + * + * Written by Douglas Bagnall + * + * ** NOTE! The following LGPL license applies to this file. + * ** It does NOT imply that all of Samba is released under the LGPL + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include +#include +#include +#include +#include +#include +#include "replace.h" +#include +#include "lzxpress_huffman.h" +#include "lib/util/stable_sort.h" +#include "lib/util/data_blob.h" + +/* set LZXHUFF_DEBUG_FILES to true to save round-trip files in /tmp. */ +#define LZXHUFF_DEBUG_FILES false + +/* set LZXHUFF_DEBUG_VERBOSE to true to print more. */ +#define LZXHUFF_DEBUG_VERBOSE false + + +#if LZXHUFF_DEBUG_VERBOSE +#define debug_message(...) print_message(__VA_ARGS__) + +#include + +struct timespec start = {0}; +struct timespec end = {0}; +static void debug_start_timer(void) +{ + clock_gettime(CLOCK_MONOTONIC, &start); +} + +static void debug_end_timer(const char *name, size_t len) +{ + uint64_t ns; + double secs; + double rate; + clock_gettime(CLOCK_MONOTONIC, &end); + ns = end.tv_nsec; + ns += end.tv_sec * 1000 * 1000 * 1000; + ns -= start.tv_nsec; + ns -= start.tv_sec * 1000 * 1000 * 1000; + secs = ns / 1e9; + rate = len / (secs * 1024 * 1024); + debug_message("%s %zu bytes in %.2g: \033[1;35m%.2f\033[0m MB per second\n", + name, len, secs, rate); +} + +#else +#define debug_message(...) /* debug_message */ +#define debug_start_timer(...) /* debug_start_timer */ +#define debug_end_timer(...) /* debug_end_timer */ +#endif + + +struct lzx_pair { + const char *name; + DATA_BLOB compressed; + DATA_BLOB decompressed; +}; + +struct lzx_file_pair { + const char *name; + const char *compressed_file; + const char *decompressed_file; +}; + + +#define DECOMP_DIR "testdata/compression/decompressed" +#define COMP_DIR "testdata/compression/compressed-huffman" +#define MORE_COMP_DIR "testdata/compression/compressed-more-huffman" + + +#define VARRGH(...) __VA_ARGS__ + +#define BLOB_FROM_ARRAY(...) \ + { \ + .data = (uint8_t[]){__VA_ARGS__}, \ + .length = sizeof((uint8_t[]){__VA_ARGS__}) \ + } + +#define BLOB_FROM_STRING(s) \ + { \ + .data = discard_const_p(uint8_t, s), \ + .length = (sizeof(s) - 1) \ + } + + +const char * file_names[] = { + "27826-8.txt", + "5d049b4cb1bd933f5e8ex19", + "638e61e96d54279981c3x5", + "64k-minus-one-zeros", + "64k-plus-one-zeros", + "64k-zeros", + "96f696a4e5ce56c61a3dx10", + "9e0b6a12febf38e98f13", + "abc-times-101", + "abc-times-105", + "abc-times-200", + "and_rand", + "and_rand-128k+", + "b63289ccc7f218c0d56b", + "beta-variate1-128k+", + "beta-variate2-128k+", + "beta-variate3-128k+", + "decayed_alphabet_128k+", + "decayed_alphabet_64k", + "exp_shuffle", + "exp_shuffle-128k+", + "f00842317dc6d5695b02", + "fib_shuffle", + "fib_shuffle-128k+", + "fuzzing-0fc2d461b56cd8103c91", + "fuzzing-17c961778538cc10ab7c", + "fuzzing-3591f9dc02bb00a54b60", + "fuzzing-3ec3bca27bb9eb40c128", + "fuzzing-80b4fa18ff5f8dd04862", + "fuzzing-a3115a81d1ac500318f9", + "generate-windows-test-vectors.c", + "midsummer-nights-dream.txt", + "notes-on-the-underground.txt", + "pg22009.txt", + "repeating", + "repeating-exactly-64k", + "setup.log", + "skewed_choices", + "skewed_choices-128k+", + /* These ones were deathly slow in fuzzing at one point */ + "slow-015ddc36a71412ccc50d", + "slow-100e9f966a7feb9ca40a", + "slow-2a671c3cff4f1574cbab", + "slow-33d90a24e70515b14cd0", + "slow-49d8c05261e3f412fc72", + "slow-50a249d2fe56873e56a0", + "slow-63e9f0b52235fb0129fa", + "slow-73b7f971d65908ac0095", + "slow-8b61e3dd267908544531", + "slow-9d1c5a079b0462986f1f", + "slow-aa7262a821dabdcf04a6", + "slow-b8a91d142b0d2af7f5ca", + "slow-c79142457734bbc8d575", + "slow-d736544545b90d83fe75", + "slow-e3b9bdfaed7d1a606fdb", + "slow-f3f1c02a9d006e5e1703", + "square_series", + "square_series-128k+", + "trigram_128k+", + "trigram_64k", + "trigram_sum_128k+", + "trigram_sum_64k", + NULL +}; + +struct lzx_pair bidirectional_pairs[] = { + + {.name = "abc__100_repeats", /* [MS-XCA] 3.2 example 2. */ + .decompressed = BLOB_FROM_STRING( + "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc" + "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc" + "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc" + "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc" + "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc" + ), + .compressed = BLOB_FROM_ARRAY( + /* + * The 'a', 'b', and 'c' bytes are 0x61, 0x62, 0x63. No other + * symbols occur. That means we need 48 0x00 bytes for the + * first 96 symbol-nybbles, then some short codes, then zeros + * again for the rest of the literals. + */ + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, + 0,0,0,0,0, 0,0,0, + 0x30, 0x23, /* a_ cb */ + 0,0,0,0,0, 0,0,0,0,0, + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, /* 100 bytes */ + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, + 0,0,0,0,0, 0,0,0, /* here end the 0-255 literals (128 bytes) */ + 0x02, /* 'EOF' symbol 256 (byte 128 low) */ + 0,0,0,0,0, 0,0,0,0,0, 0, /* 140 bytes */ + 0,0,0, + 0x20, /* codepoint 287 (byte 143 high) */ + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0, /* 160 bytes */ + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, /* 240 bytes */ + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0, + /* + * So that's the tree. + * + * length 2 codes for 'c', EOF, 287 + * length 3 for 'a', 'b'. + * + * c: 00 + * EOF: 01 + * 287: 10 + * a: 110 + * b: 111 + * + * thus the literal string "abc" is 110-111-00. + * + * Now for the lz77 match definitions for EOF and 287. + * + * Why 287? It encodes the match distance and offset. + * + * 287 - 256 = 31 + * + * _length = 31 % 16 = 15 + * _distance = 31 / 16 = 1 + * + * (it's easier to look at the hex, 0x11f: + * 1xx means a match; x1x is _distance; xxf is _length) + * + * _distance 1 means a two bit distance (10 or 11; 2 or 3). + * That means the next bit will be the least significant bit + * of distance (1 in this case, meaning distance 3). + * + * if _length is 15, real length is included inline. + * + * 'EOF' == 256 means _length = 0, _distance = 0. + * + * _distance 0 means 1, so no further bits needed. + * _length 0 means length 3. + * + * but when used as EOF, this doesn't matter. + */ + 0xa8, 0xdc, 0x00, 0x00, 0xff, 0x26, 0x01 + /* These remaining bytes are: + * + * 10101000 11011100 00000000 00000000 11111111 + * 00100110 00000001 + * + * and we read them as 16 chunks (i.e. flipping odd/even order) + * + * 110-111-00 10-1-01-000 + * a b c 287 | EOF + * | + * this is the 287 distance low bit. + * + * The last 3 bits are not used. The 287 length is sort of + * out of band, coming up soon (because 287 encodes length + * 15; most codes do not do this). + * + * 00000000 00000000 + * + * This padding is there because the first 32 bits are read + * at the beginning of decoding. If there were more things to + * be encoded, they would be in here. + * + * 11111111 + * + * This byte is pulled as the length for the 287 match. + * Because it is 0xff, we pull a further 2 bytes for the + * actual length, i.e. a 16 bit number greater than 270. + * + * 00000001 00100110 + * + * that is 0x126 = 294 = the match length - 3 (because we're + * encoding ["abc", , EOF]). + * + */ + ) + }, + {.name = "abcdefghijklmnopqrstuvwxyz", /* [MS-XCA] 3.2 example 1. */ + .decompressed = BLOB_FROM_STRING("abcdefghijklmnopqrstuvwxyz"), + .compressed = BLOB_FROM_ARRAY( + /* + * In this case there are no matches encoded as there are no + * repeated symbols. Including the EOF, there are 27 symbols + * all occurring exactly as frequently as each other (once). + * From that we would expect the codes to be mostly 5 bits + * long, because 27 < 2^5 (32), but greater than 2^4. And + * that's what we see. + */ + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, + 0,0,0,0,0, 0,0,0, + /* 14 non-zero bytes for 26 letters/nibbles */ + 0x50, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x45, 0x44, 0x04, + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0, /* 80 */ + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, + 0,0,0,0,0, 0,0,0, + 0x04, /* 0x100 EOF */ + /* no matches */ + 0,0,0,0,0, 0,0,0,0,0, 0, /* 140 */ + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, /* 240 */ + 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0, + + 0xd8, 0x52, 0x3e, 0xd7, 0x94, 0x11, 0x5b, 0xe9, + 0x19, 0x5f, 0xf9, 0xd6, 0x7c, 0xdf, 0x8d, 0x04, + 0x00, 0x00, 0x00, 0x00) + }, + {0} +}; + + +static void test_lzxpress_huffman_decompress(void **state) +{ + size_t i; + ssize_t written; + uint8_t *dest = NULL; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + for (i = 0; bidirectional_pairs[i].name != NULL; i++) { + struct lzx_pair p = bidirectional_pairs[i]; + dest = talloc_array(mem_ctx, uint8_t, p.decompressed.length); + + debug_message("%s compressed %zu decomp %zu\n", p.name, + p.compressed.length, + p.decompressed.length); + + written = lzxpress_huffman_decompress(p.compressed.data, + p.compressed.length, + dest, + p.decompressed.length); + assert_int_not_equal(written, -1); + assert_int_equal(written, p.decompressed.length); + + assert_memory_equal(dest, p.decompressed.data, p.decompressed.length); + talloc_free(dest); + } +} + +static void test_lzxpress_huffman_compress(void **state) +{ + size_t i; + ssize_t written; + uint8_t *dest = NULL; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + for (i = 0; bidirectional_pairs[i].name != NULL; i++) { + struct lzx_pair p = bidirectional_pairs[i]; + debug_message("%s compressed %zu decomp %zu\n", p.name, + p.compressed.length, + p.decompressed.length); + + written = lzxpress_huffman_compress_talloc(mem_ctx, + p.decompressed.data, + p.decompressed.length, + &dest); + + assert_int_not_equal(written, -1); + assert_int_equal(written, p.compressed.length); + assert_memory_equal(dest, p.compressed.data, p.compressed.length); + talloc_free(dest); + } +} + + +static DATA_BLOB datablob_from_file(TALLOC_CTX *mem_ctx, + const char *filename) +{ + DATA_BLOB b = {0}; + FILE *fh = fopen(filename, "rb"); + int ret; + struct stat s; + size_t len; + if (fh == NULL) { + debug_message("could not open '%s'\n", filename); + return b; + } + ret = fstat(fileno(fh), &s); + if (ret != 0) { + fclose(fh); + return b; + } + b.data = talloc_array(mem_ctx, uint8_t, s.st_size); + if (b.data == NULL) { + fclose(fh); + return b; + } + len = fread(b.data, 1, s.st_size, fh); + if (ferror(fh) || len != s.st_size) { + TALLOC_FREE(b.data); + } else { + b.length = len; + } + fclose(fh); + return b; +} + + + +static void test_lzxpress_huffman_decompress_files(void **state) +{ + size_t i; + int score = 0; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + for (i = 0; file_names[i] != NULL; i++) { + char filename[200]; + uint8_t *dest = NULL; + ssize_t written; + TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx); + struct lzx_pair p = { + .name = file_names[i] + }; + + debug_message("%s\n", p.name); + + snprintf(filename, sizeof(filename), + "%s/%s.decomp", DECOMP_DIR, p.name); + + p.decompressed = datablob_from_file(tmp_ctx, filename); + assert_non_null(p.decompressed.data); + + snprintf(filename, sizeof(filename), + "%s/%s.lzhuff", COMP_DIR, p.name); + + p.compressed = datablob_from_file(tmp_ctx, filename); + assert_non_null(p.compressed.data); + + dest = talloc_array(tmp_ctx, uint8_t, p.decompressed.length); + debug_start_timer(); + written = lzxpress_huffman_decompress(p.compressed.data, + p.compressed.length, + dest, + p.decompressed.length); + debug_end_timer("decompress", p.decompressed.length); + if (written != -1 && + written == p.decompressed.length && + memcmp(dest, p.decompressed.data, p.decompressed.length) == 0) { + debug_message("\033[1;32mdecompressed %s!\033[0m\n", p.name); + score++; + } else { + debug_message("\033[1;31mfailed to decompress %s!\033[0m\n", + p.name); + debug_message("size %zd vs reference %zu\n", + written, p.decompressed.length); + } + talloc_free(tmp_ctx); + } + debug_message("%d/%zu correct\n", score, i); + assert_int_equal(score, i); +} + + +static void test_lzxpress_huffman_decompress_more_compressed_files(void **state) +{ + /* + * This tests the decompression of files that have been compressed on + * Windows with the level turned up (to 1, default for MS-XCA is 0). + * + * The format is identical, but it will have tried harder to find + * matches. + */ + size_t i; + int score = 0; + int found = 0; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + for (i = 0; file_names[i] != NULL; i++) { + char filename[200]; + uint8_t *dest = NULL; + ssize_t written; + TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx); + struct lzx_pair p = { + .name = file_names[i] + }; + + debug_message("%s\n", p.name); + + snprintf(filename, sizeof(filename), + "%s/%s.decomp", DECOMP_DIR, p.name); + + p.decompressed = datablob_from_file(tmp_ctx, filename); + assert_non_null(p.decompressed.data); + + snprintf(filename, sizeof(filename), + "%s/%s.lzhuff", MORE_COMP_DIR, p.name); + + p.compressed = datablob_from_file(tmp_ctx, filename); + if (p.compressed.data == NULL) { + /* + * We don't have all the vectors in the + * more-compressed directory, which is OK, we skip + * them. + */ + continue; + } + found++; + dest = talloc_array(tmp_ctx, uint8_t, p.decompressed.length); + debug_start_timer(); + written = lzxpress_huffman_decompress(p.compressed.data, + p.compressed.length, + dest, + p.decompressed.length); + debug_end_timer("decompress", p.decompressed.length); + if (written == p.decompressed.length && + memcmp(dest, p.decompressed.data, p.decompressed.length) == 0) { + debug_message("\033[1;32mdecompressed %s!\033[0m\n", p.name); + score++; + } else { + debug_message("\033[1;31mfailed to decompress %s!\033[0m\n", + p.name); + debug_message("size %zd vs reference %zu\n", + written, p.decompressed.length); + } + talloc_free(tmp_ctx); + } + debug_message("%d/%d correct\n", score, found); + assert_int_equal(score, found); +} + + +/* + * attempt_round_trip() tests whether a data blob can survive a compression + * and decompression cycle. If save_name is not NULL and LZXHUFF_DEBUG_FILES + * evals to true, the various stages are saved in files with that name and the + * '-original', '-compressed', and '-decompressed' suffixes. If ref_compressed + * has data, it'll print a message saying whether the compressed data matches + * that. + */ + +static ssize_t attempt_round_trip(TALLOC_CTX *mem_ctx, + DATA_BLOB original, + const char *save_name, + DATA_BLOB ref_compressed) +{ + TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx); + DATA_BLOB compressed = data_blob_talloc(tmp_ctx, NULL, + original.length * 4 / 3 + 260); + DATA_BLOB decompressed = data_blob_talloc(tmp_ctx, NULL, + original.length); + ssize_t comp_written, decomp_written; + debug_start_timer(); + comp_written = lzxpress_huffman_compress_talloc(tmp_ctx, + original.data, + original.length, + &compressed.data); + debug_end_timer("compress", original.length); + if (comp_written <= 0) { + talloc_free(tmp_ctx); + return -1; + } + + if (ref_compressed.data != NULL) { + /* + * This is informational, not an assertion; there are + * ~infinite legitimate ways to compress the data, many as + * good as each other (think of compression as a language, not + * a format). + */ + debug_message("compressed size %zd vs reference %zu\n", + comp_written, ref_compressed.length); + + if (comp_written == compressed.length && + memcmp(compressed.data, ref_compressed.data, comp_written) == 0) { + debug_message("\033[1;32mbyte identical!\033[0m\n"); + } + } + debug_start_timer(); + decomp_written = lzxpress_huffman_decompress(compressed.data, + comp_written, + decompressed.data, + original.length); + debug_end_timer("decompress", original.length); + if (save_name != NULL && LZXHUFF_DEBUG_FILES) { + char s[300]; + FILE *fh = NULL; + + snprintf(s, sizeof(s), "%s-original", save_name); + fprintf(stderr, "Saving %zu bytes to %s\n", original.length, s); + fh = fopen(s, "w"); + fwrite(original.data, 1, original.length, fh); + fclose(fh); + + snprintf(s, sizeof(s), "%s-compressed", save_name); + fprintf(stderr, "Saving %zu bytes to %s\n", comp_written, s); + fh = fopen(s, "w"); + fwrite(compressed.data, 1, comp_written, fh); + fclose(fh); + /* + * We save the decompressed file using original.length, not + * the returned size. If these differ, the returned size will + * be -1. By saving the whole buffer we can see at what point + * it went haywire. + */ + snprintf(s, sizeof(s), "%s-decompressed", save_name); + fprintf(stderr, "Saving %zu bytes to %s\n", original.length, s); + fh = fopen(s, "w"); + fwrite(decompressed.data, 1, original.length, fh); + fclose(fh); + } + + if (original.length != decomp_written || + memcmp(decompressed.data, + original.data, + original.length) != 0) { + debug_message("\033[1;31mgot %zd, expected %zu\033[0m\n", + decomp_written, + original.length); + talloc_free(tmp_ctx); + return -1; + } + talloc_free(tmp_ctx); + return comp_written; +} + + +static void test_lzxpress_huffman_round_trip(void **state) +{ + size_t i; + int score = 0; + ssize_t compressed_total = 0; + ssize_t reference_total = 0; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + for (i = 0; file_names[i] != NULL; i++) { + char filename[200]; + char *debug_files = NULL; + TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx); + ssize_t comp_size; + struct lzx_pair p = { + .name = file_names[i] + }; + debug_message("-------------------\n"); + debug_message("%s\n", p.name); + + snprintf(filename, sizeof(filename), + "%s/%s.decomp", DECOMP_DIR, p.name); + + p.decompressed = datablob_from_file(tmp_ctx, filename); + assert_non_null(p.decompressed.data); + + snprintf(filename, sizeof(filename), + "%s/%s.lzhuff", COMP_DIR, p.name); + + p.compressed = datablob_from_file(tmp_ctx, filename); + if (p.compressed.data == NULL) { + debug_message( + "Could not load %s reference file %s\n", + p.name, filename); + debug_message("%s decompressed %zu\n", p.name, + p.decompressed.length); + } else { + debug_message("%s: reference compressed %zu decomp %zu\n", + p.name, + p.compressed.length, + p.decompressed.length); + } + if (1) { + /* + * We're going to save copies in /tmp. + */ + snprintf(filename, sizeof(filename), + "/tmp/lzxhuffman-%s", p.name); + debug_files = filename; + } + + comp_size = attempt_round_trip(mem_ctx, p.decompressed, + debug_files, + p.compressed); + if (comp_size > 0) { + debug_message("\033[1;32mround trip!\033[0m\n"); + score++; + if (p.compressed.length) { + compressed_total += comp_size; + reference_total += p.compressed.length; + } + } + talloc_free(tmp_ctx); + } + debug_message("%d/%zu correct\n", score, i); + print_message("\033[1;34mtotal compressed size: %zu\033[0m\n", + compressed_total); + print_message("total reference size: %zd \n", reference_total); + print_message("diff: %7zd \n", + reference_total - compressed_total); + assert_true(reference_total != 0); + print_message("ratio: \033[1;3%dm%.2f\033[0m \n", + 2 + (compressed_total >= reference_total), + ((double)compressed_total) / reference_total); + /* + * Assert that the compression is *about* as good as Windows. Of course + * it doesn't matter if we do better, but mysteriously getting better + * is usually a sign that something is wrong. + * + * At the time of writing, compressed_total is 2674004, or 10686 more + * than the Windows reference total. That's < 0.5% difference, we're + * asserting at 2%. + */ + assert_true(labs(compressed_total - reference_total) < + compressed_total / 50); + + assert_int_equal(score, i); + talloc_free(mem_ctx); +} + +/* + * Bob Jenkins' Small Fast RNG. + * + * We don't need it to be this good, but we do need it to be reproduceable + * across platforms, which rand() etc aren't. + * + * http://burtleburtle.net/bob/rand/smallprng.html + */ + +struct jsf_rng { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; +}; + +#define ROTATE32(x, k) (((x) << (k)) | ((x) >> (32 - (k)))) + +static uint32_t jsf32(struct jsf_rng *x) { + uint32_t e = x->a - ROTATE32(x->b, 27); + x->a = x->b ^ ROTATE32(x->c, 17); + x->b = x->c + x->d; + x->c = x->d + e; + x->d = e + x->a; + return x->d; +} + +static void jsf32_init(struct jsf_rng *x, uint32_t seed) { + size_t i; + x->a = 0xf1ea5eed; + x->b = x->c = x->d = seed; + for (i = 0; i < 20; ++i) { + jsf32(x); + } +} + + +static void test_lzxpress_huffman_long_gpl_round_trip(void **state) +{ + /* + * We use a kind of model-free Markov model to generate a massively + * extended pastiche of the GPLv3 (chosen because it is right there in + * "COPYING" and won't change often). + * + * The point is to check a round trip of a very long message with + * multiple repetitions on many scales, without having to add a very + * large file. + */ + size_t i, j, k; + uint8_t c; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + DATA_BLOB gpl = datablob_from_file(mem_ctx, "COPYING"); + DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024); + DATA_BLOB ref = {0}; + ssize_t comp_size; + struct jsf_rng rng; + + if (gpl.data == NULL) { + print_message("could not read COPYING\n"); + fail(); + } + + jsf32_init(&rng, 1); + + j = 1; + original.data[0] = gpl.data[0]; + for (i = 1; i < original.length; i++) { + size_t m; + char p = original.data[i - 1]; + c = gpl.data[j]; + original.data[i] = c; + j++; + m = (j + jsf32(&rng)) % (gpl.length - 50); + for (k = m; k < m + 30; k++) { + if (p == gpl.data[k] && + c == gpl.data[k + 1]) { + j = k + 2; + break; + } + } + if (j == gpl.length) { + j = 1; + } + } + + comp_size = attempt_round_trip(mem_ctx, original, "/tmp/gpl", ref); + assert_true(comp_size > 0); + assert_true(comp_size < original.length); + + talloc_free(mem_ctx); +} + + +static void test_lzxpress_huffman_long_random_graph_round_trip(void **state) +{ + size_t i; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024); + DATA_BLOB ref = {0}; + /* + * There's a random trigram graph, with each pair of sequential bytes + * pointing to a successor. This would probably fall into a fairly + * simple loop, but we introduce damage into the system, randomly + * flipping about 1 bit in 64. + * + * The result is semi-structured and compressible. + */ + uint8_t *d = original.data; + uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536); + uint32_t *table32 = (void*)table; + ssize_t comp_size; + struct jsf_rng rng; + + jsf32_init(&rng, 1); + for (i = 0; i < (65536 / 4); i++) { + table32[i] = jsf32(&rng); + } + + d[0] = 'a'; + d[1] = 'b'; + + for (i = 2; i < original.length; i++) { + uint16_t k = (d[i - 2] << 8) | d[i - 1]; + uint32_t damage = jsf32(&rng) & jsf32(&rng) & jsf32(&rng); + damage &= (damage >> 16); + k ^= damage & 0xffff; + d[i] = table[k]; + } + + comp_size = attempt_round_trip(mem_ctx, original, "/tmp/random-graph", ref); + assert_true(comp_size > 0); + assert_true(comp_size < original.length); + + talloc_free(mem_ctx); +} + + +static void test_lzxpress_huffman_chaos_graph_round_trip(void **state) +{ + size_t i; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024); + DATA_BLOB ref = {0}; + /* + * There's a random trigram graph, with each pair of sequential bytes + * pointing to a successor. This would probably fall into a fairly + * simple loop, but we keep changing the graph. The result is long + * periods of stability separatd by bursts of noise. + */ + uint8_t *d = original.data; + uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536); + uint32_t *table32 = (void*)table; + ssize_t comp_size; + struct jsf_rng rng; + + jsf32_init(&rng, 1); + for (i = 0; i < (65536 / 4); i++) { + table32[i] = jsf32(&rng); + } + + d[0] = 'a'; + d[1] = 'b'; + + for (i = 2; i < original.length; i++) { + uint16_t k = (d[i - 2] << 8) | d[i - 1]; + uint32_t damage = jsf32(&rng); + d[i] = table[k]; + if ((damage >> 29) == 0) { + uint16_t index = damage & 0xffff; + uint8_t value = (damage >> 16) & 0xff; + table[index] = value; + } + } + + comp_size = attempt_round_trip(mem_ctx, original, "/tmp/chaos-graph", ref); + assert_true(comp_size > 0); + assert_true(comp_size < original.length); + + talloc_free(mem_ctx); +} + + +static void test_lzxpress_huffman_sparse_random_graph_round_trip(void **state) +{ + size_t i; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024); + DATA_BLOB ref = {0}; + /* + * There's a random trigram graph, with each pair of sequential bytes + * pointing to a successor. This will fall into a fairly simple loops, + * but we introduce damage into the system, randomly mangling about 1 + * byte in 65536. + * + * The result has very long repetitive runs, which should lead to + * oversized blocks. + */ + uint8_t *d = original.data; + uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536); + uint32_t *table32 = (void*)table; + ssize_t comp_size; + struct jsf_rng rng; + + jsf32_init(&rng, 3); + for (i = 0; i < (65536 / 4); i++) { + table32[i] = jsf32(&rng); + } + + d[0] = 'a'; + d[1] = 'b'; + + for (i = 2; i < original.length; i++) { + uint16_t k = (d[i - 2] << 8) | d[i - 1]; + uint32_t damage = jsf32(&rng); + if ((damage & 0xffff0000) == 0) { + k ^= damage & 0xffff; + } + d[i] = table[k]; + } + + comp_size = attempt_round_trip(mem_ctx, original, "/tmp/sparse-random-graph", ref); + assert_true(comp_size > 0); + assert_true(comp_size < original.length); + + talloc_free(mem_ctx); +} + + +static void test_lzxpress_huffman_random_noise_round_trip(void **state) +{ + size_t i; + size_t len = 1024 * 1024; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, len); + DATA_BLOB ref = {0}; + ssize_t comp_size; + /* + * We are filling this up with incompressible noise, but we can assert + * quite tight bounds on how badly it will fail to compress. + * + * Specifically, with randomly distributed codes, the Huffman table + * should come out as roughly even, averaging 8 bit codes. Then there + * will be a 256 byte table every 64k, which is a 1/256 overhead (i.e. + * the compressed length will be 257/256 the original *on average*). + * We assert it is less than 1 in 200 but more than 1 in 300. + */ + uint32_t *d32 = (uint32_t*)((void*)original.data); + struct jsf_rng rng; + jsf32_init(&rng, 2); + + for (i = 0; i < (len / 4); i++) { + d32[i] = jsf32(&rng); + } + + comp_size = attempt_round_trip(mem_ctx, original, "/tmp/random-noise", ref); + assert_true(comp_size > 0); + assert_true(comp_size > original.length + original.length / 300); + assert_true(comp_size < original.length + original.length / 200); + debug_message("original size %zu; compressed size %zd; ratio %.3f\n", + len, comp_size, ((double)comp_size) / len); + + talloc_free(mem_ctx); +} + + +static void test_lzxpress_huffman_overlong_matches(void **state) +{ + size_t i, j = 0; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 1024 * 1024); + DATA_BLOB ref = {0}; + uint8_t *d = original.data; + char filename[300]; + /* + * We are testing with something like "aaaaaaaaaaaaaaaaaaaaaaabbbbb" + * where typically the number of "a"s is > 65536, and the number of + * "b"s is < 42. + */ + ssize_t na[] = {65535, 65536, 65537, 65559, 65575, 200000, -1}; + ssize_t nb[] = {1, 2, 20, 39, 40, 41, 42, -1}; + int score = 0; + ssize_t comp_size; + + for (i = 0; na[i] >= 0; i++) { + ssize_t a = na[i]; + memset(d, 'a', a); + for (j = 0; nb[j] >= 0; j++) { + ssize_t b = nb[j]; + memset(d + a, 'b', b); + original.length = a + b; + snprintf(filename, sizeof(filename), + "/tmp/overlong-%zd-%zd", a, b); + comp_size = attempt_round_trip(mem_ctx, + original, + filename, ref); + if (comp_size > 0) { + score++; + } + } + } + debug_message("%d/%zu correct\n", score, i * j); + assert_int_equal(score, i * j); + talloc_free(mem_ctx); +} + + +static void test_lzxpress_huffman_overlong_matches_abc(void **state) +{ + size_t i, j = 0, k = 0; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 1024 * 1024); + DATA_BLOB ref = {0}; + uint8_t *d = original.data; + char filename[300]; + /* + * We are testing with something like "aaaabbbbcc" where typically + * the number of "a"s + "b"s is around 65536, and the number of "c"s + * is < 43. + */ + ssize_t nab[] = {1, 21, 32767, 32768, 32769, -1}; + ssize_t nc[] = {1, 2, 20, 39, 40, 41, 42, -1}; + int score = 0; + ssize_t comp_size; + + for (i = 0; nab[i] >= 0; i++) { + ssize_t a = nab[i]; + memset(d, 'a', a); + for (j = 0; nab[j] >= 0; j++) { + ssize_t b = nab[j]; + memset(d + a, 'b', b); + for (k = 0; nc[k] >= 0; k++) { + ssize_t c = nc[k]; + memset(d + a + b, 'c', c); + original.length = a + b + c; + snprintf(filename, sizeof(filename), + "/tmp/overlong-abc-%zd-%zd-%zd", + a, b, c); + comp_size = attempt_round_trip(mem_ctx, + original, + filename, ref); + if (comp_size > 0) { + score++; + } + } + } + } + debug_message("%d/%zu correct\n", score, i * j * k); + assert_int_equal(score, i * j * k); + talloc_free(mem_ctx); +} + + +static void test_lzxpress_huffman_extremely_compressible_middle(void **state) +{ + size_t len = 192 * 1024; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, len); + DATA_BLOB ref = {0}; + ssize_t comp_size; + /* + * When a middle block (i.e. not the first and not the last of >= 3), + * can be entirely expressed as a match starting in the previous + * block, the Huffman tree would end up with 1 element, which does not + * work for the code construction. It really wants to use both bits. + * So we need to ensure we have some way of dealing with this. + */ + memset(original.data, 'a', 0x10000 - 1); + memset(original.data + 0x10000 - 1, 'b', 0x10000 + 1); + memset(original.data + 0x20000, 'a', 0x10000); + comp_size = attempt_round_trip(mem_ctx, original, "/tmp/compressible-middle", ref); + assert_true(comp_size > 0); + assert_true(comp_size < 1024); + debug_message("original size %zu; compressed size %zd; ratio %.3f\n", + len, comp_size, ((double)comp_size) / len); + + talloc_free(mem_ctx); +} + + +static void test_lzxpress_huffman_max_length_limit(void **state) +{ + size_t len = 65 * 1024 * 1024; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + DATA_BLOB original = data_blob_talloc_zero(mem_ctx, len); + DATA_BLOB ref = {0}; + ssize_t comp_size; + /* + * Reputedly Windows has a 64MB limit in the maximum match length it + * will encode. We follow this, and test that here with nearly 65 MB + * of zeros between two letters; this should be encoded in three + * blocks: + * + * 1. 'a', 64M × '\0' + * 2. (1M - 2) × '\0' -- finishing off what would have been the same match + * 3. 'b' EOF + * + * Which we can assert by saying the length is > 768, < 1024. + */ + original.data[0] = 'a'; + original.data[len - 1] = 'b'; + comp_size = attempt_round_trip(mem_ctx, original, "/tmp/max-length-limit", ref); + assert_true(comp_size > 0x300); + assert_true(comp_size < 0x400); + debug_message("original size %zu; compressed size %zd; ratio %.3f\n", + len, comp_size, ((double)comp_size) / len); + + talloc_free(mem_ctx); +} + + +static void test_lzxpress_huffman_short_boring_strings(void **state) +{ + size_t len = 64 * 1024; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, len); + DATA_BLOB ref = {0}; + ssize_t comp_size; + ssize_t lengths[] = { + 1, 2, 20, 39, 40, 41, 42, 256, 270, 273, 274, 1000, 64000, -1}; + char filename[300]; + size_t i; + /* + * How do short repetitive strings work? We're poking at the limit + * around which LZ77 comprssion is turned on. + * + * For this test we don't change the blob memory between runs, just + * the declared length. + */ + memset(original.data, 'a', len); + for (i = 0; lengths[i] >= 0; i++) { + original.length = lengths[i]; + snprintf(filename, sizeof(filename), + "/tmp/short-boring-%zu", + original.length); + comp_size = attempt_round_trip(mem_ctx, original, filename, ref); + if (original.length < 41) { + assert_true(comp_size > 256 + original.length / 8); + } else if (original.length < 274) { + assert_true(comp_size == 261); + } else { + assert_true(comp_size == 263); + } + assert_true(comp_size < 261 + original.length / 8); + } + /* let's just show we didn't change the original */ + for (i = 0; i < len; i++) { + if (original.data[i] != 'a') { + fail_msg("input data[%zu] was changed! (%2x, expected %2x)\n", + i, original.data[i], 'a'); + } + } + + talloc_free(mem_ctx); +} + + +static void test_lzxpress_huffman_compress_empty_or_null(void **state) +{ + /* + * We expect these to fail with a -1, except the last one, which does + * the real thing. + */ + ssize_t ret; + const uint8_t *input = bidirectional_pairs[0].decompressed.data; + size_t ilen = bidirectional_pairs[0].decompressed.length; + size_t olen = bidirectional_pairs[0].compressed.length; + uint8_t output[olen]; + struct lzxhuff_compressor_mem cmp_mem; + + ret = lzxpress_huffman_compress(&cmp_mem, input, 0, output, olen); + assert_int_equal(ret, -1LL); + ret = lzxpress_huffman_compress(&cmp_mem, input, ilen, output, 0); + assert_int_equal(ret, -1LL); + + ret = lzxpress_huffman_compress(&cmp_mem, NULL, ilen, output, olen); + assert_int_equal(ret, -1LL); + ret = lzxpress_huffman_compress(&cmp_mem, input, ilen, NULL, olen); + assert_int_equal(ret, -1LL); + ret = lzxpress_huffman_compress(NULL, input, ilen, output, olen); + assert_int_equal(ret, -1LL); + + ret = lzxpress_huffman_compress(&cmp_mem, input, ilen, output, olen); + assert_int_equal(ret, olen); +} + + +static void test_lzxpress_huffman_decompress_empty_or_null(void **state) +{ + /* + * We expect these to fail with a -1, except the last one. + */ + ssize_t ret; + const uint8_t *input = bidirectional_pairs[0].compressed.data; + size_t ilen = bidirectional_pairs[0].compressed.length; + size_t olen = bidirectional_pairs[0].decompressed.length; + uint8_t output[olen]; + + ret = lzxpress_huffman_decompress(input, 0, output, olen); + assert_int_equal(ret, -1LL); + ret = lzxpress_huffman_decompress(input, ilen, output, 0); + assert_int_equal(ret, -1LL); + + ret = lzxpress_huffman_decompress(NULL, ilen, output, olen); + assert_int_equal(ret, -1LL); + ret = lzxpress_huffman_decompress(input, ilen, NULL, olen); + assert_int_equal(ret, -1LL); + + ret = lzxpress_huffman_decompress(input, ilen, output, olen); + assert_int_equal(ret, olen); +} + + +int main(void) { + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_lzxpress_huffman_short_boring_strings), + cmocka_unit_test(test_lzxpress_huffman_max_length_limit), + cmocka_unit_test(test_lzxpress_huffman_extremely_compressible_middle), + cmocka_unit_test(test_lzxpress_huffman_long_random_graph_round_trip), + cmocka_unit_test(test_lzxpress_huffman_chaos_graph_round_trip), + cmocka_unit_test(test_lzxpress_huffman_sparse_random_graph_round_trip), + cmocka_unit_test(test_lzxpress_huffman_round_trip), + cmocka_unit_test(test_lzxpress_huffman_decompress_files), + cmocka_unit_test(test_lzxpress_huffman_decompress_more_compressed_files), + cmocka_unit_test(test_lzxpress_huffman_compress), + cmocka_unit_test(test_lzxpress_huffman_decompress), + cmocka_unit_test(test_lzxpress_huffman_long_gpl_round_trip), + cmocka_unit_test(test_lzxpress_huffman_long_random_graph_round_trip), + cmocka_unit_test(test_lzxpress_huffman_random_noise_round_trip), + cmocka_unit_test(test_lzxpress_huffman_overlong_matches_abc), + cmocka_unit_test(test_lzxpress_huffman_overlong_matches), + cmocka_unit_test(test_lzxpress_huffman_decompress_empty_or_null), + cmocka_unit_test(test_lzxpress_huffman_compress_empty_or_null), + }; + if (!isatty(1)) { + cmocka_set_message_output(CM_OUTPUT_SUBUNIT); + } + + return cmocka_run_group_tests(tests, NULL, NULL); +} diff --git a/lib/compression/tests/test_lzxpress_plain.c b/lib/compression/tests/test_lzxpress_plain.c new file mode 100644 index 0000000..1c14793 --- /dev/null +++ b/lib/compression/tests/test_lzxpress_plain.c @@ -0,0 +1,1194 @@ +/* + Unix SMB/CIFS implementation. + test suite for the compression functions + + Copyright (C) Jelmer Vernooij 2007 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include +#include +#include "includes.h" +#include "talloc.h" +#include "lzxpress.h" +#include "lib/util/base64.h" + + +/* set LZX_DEBUG_FILES to true to save round-trip files in /tmp. */ +#define LZX_DEBUG_FILES false + +/* set LZX_DEBUG_VERBOSE to true to print more. */ +#define LZX_DEBUG_VERBOSE false + + +#if LZX_DEBUG_VERBOSE +#define debug_message(...) print_message(__VA_ARGS__) + +#include + +struct timespec start = {0}; +struct timespec end = {0}; +static void debug_start_timer(void) +{ + clock_gettime(CLOCK_MONOTONIC, &start); +} + +static void debug_end_timer(const char *name, size_t len) +{ + uint64_t ns; + double secs; + double rate; + clock_gettime(CLOCK_MONOTONIC, &end); + ns = end.tv_nsec; + ns += end.tv_sec * 1000 * 1000 * 1000; + ns -= start.tv_nsec; + ns -= start.tv_sec * 1000 * 1000 * 1000; + secs = ns / 1e9; + rate = len / (secs * 1024 * 1024); + debug_message("%s %zu bytes in %.2g: \033[1;35m%.2f\033[0m MB per second\n", + name, len, secs, rate); +} + +#else +#define debug_message(...) /* debug_message */ +#define debug_start_timer(...) /* debug_start_timer */ +#define debug_end_timer(...) /* debug_end_timer */ +#endif + + +struct lzx_pair { + const char *name; + DATA_BLOB compressed; + DATA_BLOB decompressed; +}; + +struct lzx_file_pair { + const char *name; + const char *compressed_file; + const char *decompressed_file; +}; + + +#define DECOMP_DIR "testdata/compression/decompressed" +#define COMP_DIR "testdata/compression/compressed-plain" +#define MORE_COMP_DIR "testdata/compression/compressed-more-plain" + + +#define BLOB_FROM_ARRAY(...) \ + { \ + .data = (uint8_t[]){__VA_ARGS__}, \ + .length = sizeof((uint8_t[]){__VA_ARGS__}) \ + } + +#define BLOB_FROM_STRING(s) \ + { \ + .data = discard_const_p(uint8_t, s), \ + .length = (sizeof(s) - 1) \ + } + + +const char * file_names[] = { + "generate-windows-test-vectors.c", + "fib_shuffle-128k+", + "fuzzing-0fc2d461b56cd8103c91", + "fuzzing-3ec3bca27bb9eb40c128", + "fuzzing-a3115a81d1ac500318f9", + "fuzzing-3591f9dc02bb00a54b60", + "27826-8.txt", + "5d049b4cb1bd933f5e8ex19", + "638e61e96d54279981c3x5", + "64k-minus-one-zeros", + "64k-plus-one-zeros", + "64k-zeros", + "96f696a4e5ce56c61a3dx10", + "9e0b6a12febf38e98f13", + "abc-times-101", + "abc-times-105", + "abc-times-200", + "b63289ccc7f218c0d56b", + "beta-variate1-128k+", + "beta-variate3-128k+", + "decayed_alphabet_128k+", + "decayed_alphabet_64k", + "f00842317dc6d5695b02", + "fib_shuffle", + "midsummer-nights-dream.txt", + "notes-on-the-underground.txt", + "pg22009.txt", + "repeating", + "repeating-exactly-64k", + "setup.log", + "slow-015ddc36a71412ccc50d", + "slow-100e9f966a7feb9ca40a", + "slow-2a671c3cff4f1574cbab", + "slow-33d90a24e70515b14cd0", + "slow-49d8c05261e3f412fc72", + "slow-50a249d2fe56873e56a0", + "slow-63e9f0b52235fb0129fa", + "slow-73b7f971d65908ac0095", + "slow-8b61e3dd267908544531", + "slow-9d1c5a079b0462986f1f", + "slow-aa7262a821dabdcf04a6", + "slow-b8a91d142b0d2af7f5ca", + "slow-c79142457734bbc8d575", + "slow-d736544545b90d83fe75", + "slow-e3b9bdfaed7d1a606fdb", + "slow-f3f1c02a9d006e5e1703", + "trigram_128k+", + "trigram_64k", + "trigram_sum_128k+", + "trigram_sum_64k", + NULL +}; + + + +static DATA_BLOB datablob_from_file(TALLOC_CTX *mem_ctx, + const char *filename) +{ + DATA_BLOB b = {0}; + FILE *fh = fopen(filename, "rb"); + int ret; + struct stat s; + size_t len; + if (fh == NULL) { + debug_message("could not open '%s'\n", filename); + return b; + } + ret = fstat(fileno(fh), &s); + if (ret != 0) { + fclose(fh); + return b; + } + b.data = talloc_array(mem_ctx, uint8_t, s.st_size); + if (b.data == NULL) { + fclose(fh); + return b; + } + len = fread(b.data, 1, s.st_size, fh); + if (ferror(fh) || len != s.st_size) { + TALLOC_FREE(b.data); + } else { + b.length = len; + } + fclose(fh); + return b; +} + + + +static void test_lzxpress_plain_decompress_files(void **state) +{ + size_t i; + int score = 0; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + for (i = 0; file_names[i] != NULL; i++) { + char filename[200]; + uint8_t *dest = NULL; + ssize_t written; + TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx); + struct lzx_pair p = { + .name = file_names[i] + }; + + debug_message("%s\n", p.name); + + snprintf(filename, sizeof(filename), + "%s/%s.decomp", DECOMP_DIR, p.name); + + p.decompressed = datablob_from_file(tmp_ctx, filename); + assert_non_null(p.decompressed.data); + + snprintf(filename, sizeof(filename), + "%s/%s.lzplain", COMP_DIR, p.name); + + p.compressed = datablob_from_file(tmp_ctx, filename); + assert_non_null(p.compressed.data); + + dest = talloc_array(tmp_ctx, uint8_t, p.decompressed.length); + debug_start_timer(); + written = lzxpress_decompress(p.compressed.data, + p.compressed.length, + dest, + p.decompressed.length); + debug_end_timer("decompress", p.decompressed.length); + if (written == p.decompressed.length && + memcmp(dest, p.decompressed.data, p.decompressed.length) == 0) { + debug_message("\033[1;32mdecompressed %s!\033[0m\n", p.name); + score++; + } else { + debug_message("\033[1;31mfailed to decompress %s!\033[0m\n", + p.name); + debug_message("size %zd vs reference %zu\n", + written, p.decompressed.length); + } + talloc_free(tmp_ctx); + } + debug_message("%d/%zu correct\n", score, i); + assert_int_equal(score, i); +} + + +static void test_lzxpress_plain_decompress_more_compressed_files(void **state) +{ + /* + * This tests the decompression of files that have been compressed on + * Windows with the level turned up (to 1, default for MS-XCA is 0). + * + * The format is identical, but it will have tried harder to find + * matches. + */ + size_t i; + int score = 0; + int found = 0; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + for (i = 0; file_names[i] != NULL; i++) { + char filename[200]; + uint8_t *dest = NULL; + ssize_t written; + TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx); + struct lzx_pair p = { + .name = file_names[i] + }; + + debug_message("%s\n", p.name); + + snprintf(filename, sizeof(filename), + "%s/%s.decomp", DECOMP_DIR, p.name); + + p.decompressed = datablob_from_file(tmp_ctx, filename); + assert_non_null(p.decompressed.data); + + snprintf(filename, sizeof(filename), + "%s/%s.lzplain", MORE_COMP_DIR, p.name); + + p.compressed = datablob_from_file(tmp_ctx, filename); + if (p.compressed.data == NULL) { + /* + * We don't have all the vectors in the + * more-compressed directory, which is OK, we skip + * them. + */ + continue; + } + found++; + dest = talloc_array(tmp_ctx, uint8_t, p.decompressed.length); + debug_start_timer(); + written = lzxpress_decompress(p.compressed.data, + p.compressed.length, + dest, + p.decompressed.length); + debug_end_timer("decompress", p.decompressed.length); + if (written != -1 && + written == p.decompressed.length && + memcmp(dest, p.decompressed.data, p.decompressed.length) == 0) { + debug_message("\033[1;32mdecompressed %s!\033[0m\n", p.name); + score++; + } else { + debug_message("\033[1;31mfailed to decompress %s!\033[0m\n", + p.name); + debug_message("size %zd vs reference %zu\n", + written, p.decompressed.length); + } + talloc_free(tmp_ctx); + } + debug_message("%d/%d correct\n", score, found); + assert_int_equal(score, found); +} + + +/* + * attempt_round_trip() tests whether a data blob can survive a compression + * and decompression cycle. If save_name is not NULL and LZX_DEBUG_FILES + * evals to true, the various stages are saved in files with that name and the + * '-original', '-compressed', and '-decompressed' suffixes. If ref_compressed + * has data, it'll print a message saying whether the compressed data matches + * that. + */ + +static ssize_t attempt_round_trip(TALLOC_CTX *mem_ctx, + DATA_BLOB original, + const char *save_name, + DATA_BLOB ref_compressed) +{ + TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx); + DATA_BLOB compressed = data_blob_talloc(tmp_ctx, NULL, + original.length * 8 / 7 + 8); + DATA_BLOB decompressed = data_blob_talloc(tmp_ctx, NULL, + original.length); + ssize_t comp_written, decomp_written; + debug_start_timer(); + comp_written = lzxpress_compress(original.data, + original.length, + compressed.data, + compressed.length); + debug_end_timer("compress", original.length); + if (comp_written <= 0) { + talloc_free(tmp_ctx); + return -1; + } + + if (ref_compressed.data != NULL) { + /* + * This is informational, not an assertion; there are + * ~infinite legitimate ways to compress the data, many as + * good as each other (think of compression as a language, not + * a format). + */ + debug_message("compressed size %zd vs reference %zu\n", + comp_written, ref_compressed.length); + + if (comp_written == compressed.length && + memcmp(compressed.data, ref_compressed.data, comp_written) == 0) { + debug_message("\033[1;32mbyte identical!\033[0m\n"); + } + } + debug_start_timer(); + decomp_written = lzxpress_decompress(compressed.data, + comp_written, + decompressed.data, + decompressed.length); + debug_end_timer("decompress", original.length); + if (save_name != NULL && LZX_DEBUG_FILES) { + char s[300]; + FILE *fh = NULL; + + snprintf(s, sizeof(s), "%s-original", save_name); + fprintf(stderr, "Saving %zu bytes to %s\n", original.length, s); + fh = fopen(s, "w"); + fwrite(original.data, 1, original.length, fh); + fclose(fh); + + snprintf(s, sizeof(s), "%s-compressed", save_name); + fprintf(stderr, "Saving %zu bytes to %s\n", comp_written, s); + fh = fopen(s, "w"); + fwrite(compressed.data, 1, comp_written, fh); + fclose(fh); + /* + * We save the decompressed file using original.length, not + * the returned size. If these differ, the returned size will + * be -1. By saving the whole buffer we can see at what point + * it went haywire. + */ + snprintf(s, sizeof(s), "%s-decompressed", save_name); + fprintf(stderr, "Saving %zu bytes to %s\n", original.length, s); + fh = fopen(s, "w"); + fwrite(decompressed.data, 1, original.length, fh); + fclose(fh); + } + + if (original.length != decomp_written || + memcmp(decompressed.data, + original.data, + original.length) != 0) { + debug_message("\033[1;31mgot %zd, expected %zu\033[0m\n", + decomp_written, + original.length); + talloc_free(tmp_ctx); + return -1; + } + talloc_free(tmp_ctx); + return comp_written; +} + + +static void test_lzxpress_plain_round_trip_files(void **state) +{ + size_t i; + int score = 0; + ssize_t compressed_total = 0; + ssize_t reference_total = 0; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + for (i = 0; file_names[i] != NULL; i++) { + char filename[200]; + char *debug_files = NULL; + TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx); + ssize_t comp_size; + struct lzx_pair p = { + .name = file_names[i] + }; + debug_message("-------------------\n"); + debug_message("%s\n", p.name); + + snprintf(filename, sizeof(filename), + "%s/%s.decomp", DECOMP_DIR, p.name); + + p.decompressed = datablob_from_file(tmp_ctx, filename); + assert_non_null(p.decompressed.data); + + snprintf(filename, sizeof(filename), + "%s/%s.lzplain", COMP_DIR, p.name); + + p.compressed = datablob_from_file(tmp_ctx, filename); + if (p.compressed.data == NULL) { + debug_message( + "Could not load %s reference file %s\n", + p.name, filename); + debug_message("%s decompressed %zu\n", p.name, + p.decompressed.length); + } else { + debug_message("%s: reference compressed %zu decomp %zu\n", + p.name, + p.compressed.length, + p.decompressed.length); + } + if (1) { + /* + * We're going to save copies in /tmp. + */ + snprintf(filename, sizeof(filename), + "/tmp/lzxplain-%s", p.name); + debug_files = filename; + } + + comp_size = attempt_round_trip(mem_ctx, p.decompressed, + debug_files, + p.compressed); + if (comp_size > 0) { + debug_message("\033[1;32mround trip!\033[0m\n"); + score++; + if (p.compressed.length) { + compressed_total += comp_size; + reference_total += p.compressed.length; + } + } + talloc_free(tmp_ctx); + } + debug_message("%d/%zu correct\n", score, i); + print_message("\033[1;34mtotal compressed size: %zu\033[0m\n", + compressed_total); + print_message("total reference size: %zd \n", reference_total); + print_message("diff: %7zd \n", + reference_total - compressed_total); + assert_true(reference_total != 0); + print_message("ratio: \033[1;3%dm%.2f\033[0m \n", + 2 + (compressed_total >= reference_total), + ((double)compressed_total) / reference_total); + /* + * Assert that the compression is better than Windows. Unlike the + * Huffman variant, where things are very even, here we do much better + * than Windows without especially trying. + */ + assert_true(compressed_total <= reference_total); + + assert_int_equal(score, i); + talloc_free(mem_ctx); +} + + +/* + * Bob Jenkins' Small Fast RNG. + * + * We don't need it to be this good, but we do need it to be reproduceable + * across platforms, which rand() etc aren't. + * + * http://burtleburtle.net/bob/rand/smallprng.html + */ + +struct jsf_rng { + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; +}; + +#define ROTATE32(x, k) (((x) << (k)) | ((x) >> (32 - (k)))) + +static uint32_t jsf32(struct jsf_rng *x) { + uint32_t e = x->a - ROTATE32(x->b, 27); + x->a = x->b ^ ROTATE32(x->c, 17); + x->b = x->c + x->d; + x->c = x->d + e; + x->d = e + x->a; + return x->d; +} + +static void jsf32_init(struct jsf_rng *x, uint32_t seed) { + size_t i; + x->a = 0xf1ea5eed; + x->b = x->c = x->d = seed; + for (i = 0; i < 20; ++i) { + jsf32(x); + } +} + + +static void test_lzxpress_plain_long_gpl_round_trip(void **state) +{ + /* + * We use a kind of model-free Markov model to generate a massively + * extended pastiche of the GPLv3 (chosen because it is right there in + * "COPYING" and won't change often). + * + * The point is to check a round trip of a very long message with + * multiple repetitions on many scales, without having to add a very + * large file. + */ + size_t i, j, k; + uint8_t c; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + DATA_BLOB gpl = datablob_from_file(mem_ctx, "COPYING"); + DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024); + DATA_BLOB ref = {0}; + ssize_t comp_size; + struct jsf_rng rng; + + + jsf32_init(&rng, 1); + + j = 1; + original.data[0] = gpl.data[0]; + for (i = 1; i < original.length; i++) { + size_t m; + char p = original.data[i - 1]; + c = gpl.data[j]; + original.data[i] = c; + j++; + m = (j + jsf32(&rng)) % (gpl.length - 50); + for (k = m; k < m + 30; k++) { + if (p == gpl.data[k] && + c == gpl.data[k + 1]) { + j = k + 2; + break; + } + } + if (j == gpl.length) { + j = 1; + } + } + + comp_size = attempt_round_trip(mem_ctx, original, "/tmp/gpl", ref); + assert_true(comp_size > 0); + assert_true(comp_size < original.length); + + talloc_free(mem_ctx); +} + + +static void test_lzxpress_plain_long_random_graph_round_trip(void **state) +{ + size_t i; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024); + DATA_BLOB ref = {0}; + /* + * There's a random trigram graph, with each pair of sequential bytes + * pointing to a successor. This would probably fall into a fairly + * simple loop, but we introduce damage into the system, randomly + * flipping about 1 bit in 64. + * + * The result is semi-structured and compressible. + */ + uint8_t *d = original.data; + uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536); + uint32_t *table32 = (void*)table; + ssize_t comp_size; + struct jsf_rng rng; + + jsf32_init(&rng, 1); + for (i = 0; i < (65536 / 4); i++) { + table32[i] = jsf32(&rng); + } + + d[0] = 'a'; + d[1] = 'b'; + + for (i = 2; i < original.length; i++) { + uint16_t k = (d[i - 2] << 8) | d[i - 1]; + uint32_t damage = jsf32(&rng) & jsf32(&rng) & jsf32(&rng); + damage &= (damage >> 16); + k ^= damage & 0xffff; + d[i] = table[k]; + } + + comp_size = attempt_round_trip(mem_ctx, original, "/tmp/random-graph", ref); + assert_true(comp_size > 0); + assert_true(comp_size < original.length); + + talloc_free(mem_ctx); +} + + +static void test_lzxpress_plain_chaos_graph_round_trip(void **state) +{ + size_t i; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024); + DATA_BLOB ref = {0}; + /* + * There's a random trigram graph, with each pair of sequential bytes + * pointing to a successor. This would probably fall into a fairly + * simple loop, but we keep changing the graph. The result is long + * periods of stability separatd by bursts of noise. + */ + uint8_t *d = original.data; + uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536); + uint32_t *table32 = (void*)table; + ssize_t comp_size; + struct jsf_rng rng; + + jsf32_init(&rng, 1); + for (i = 0; i < (65536 / 4); i++) { + table32[i] = jsf32(&rng); + } + + d[0] = 'a'; + d[1] = 'b'; + + for (i = 2; i < original.length; i++) { + uint16_t k = (d[i - 2] << 8) | d[i - 1]; + uint32_t damage = jsf32(&rng); + d[i] = table[k]; + if ((damage >> 29) == 0) { + uint16_t index = damage & 0xffff; + uint8_t value = (damage >> 16) & 0xff; + table[index] = value; + } + } + + comp_size = attempt_round_trip(mem_ctx, original, "/tmp/chaos-graph", ref); + assert_true(comp_size > 0); + assert_true(comp_size < original.length); + + talloc_free(mem_ctx); +} + + +static void test_lzxpress_plain_sparse_random_graph_round_trip(void **state) +{ + size_t i; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024); + DATA_BLOB ref = {0}; + /* + * There's a random trigram graph, with each pair of sequential bytes + * pointing to a successor. This will fall into a fairly simple loops, + * but we introduce damage into the system, randomly mangling about 1 + * byte in 65536. + * + * The result has very long repetitive runs, which should lead to + * oversized blocks. + */ + uint8_t *d = original.data; + uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536); + uint32_t *table32 = (void*)table; + ssize_t comp_size; + struct jsf_rng rng; + + jsf32_init(&rng, 3); + for (i = 0; i < (65536 / 4); i++) { + table32[i] = jsf32(&rng); + } + + d[0] = 'a'; + d[1] = 'b'; + + for (i = 2; i < original.length; i++) { + uint16_t k = (d[i - 2] << 8) | d[i - 1]; + uint32_t damage = jsf32(&rng); + if ((damage & 0xffff0000) == 0) { + k ^= damage & 0xffff; + } + d[i] = table[k]; + } + + comp_size = attempt_round_trip(mem_ctx, original, "/tmp/sparse-random-graph", ref); + assert_true(comp_size > 0); + assert_true(comp_size < original.length); + + talloc_free(mem_ctx); +} + + +static void test_lzxpress_plain_random_noise_round_trip(void **state) +{ + size_t i; + size_t len = 10 * 1024 * 1024; + TALLOC_CTX *mem_ctx = talloc_new(NULL); + DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, len); + DATA_BLOB ref = {0}; + ssize_t comp_size; + /* + * We are filling this up with incompressible noise, but we can assert + * quite tight bounds on how badly it will fail to compress. + * + * There is one additional bit for each code, which says whether the + * code is a literal byte or a match. If *all* codes are literal + * bytes, the length should be 9/8 the original (with rounding + * issues regarding the indicator bit blocks). + * + * If some matches are found the length will be a bit less. We would + * expect one 3 byte match per 1 << 24 tries, but we try 8192 times + * per position. That means there'll a match 1/2048 of the time at + * best. 255 times out of 256 this will be exactly a 3 byte match, + * encoded as two bytes, so we could get a 1 / 2048 saving on top of + * the 1/8 cost. There'll be a smattering of longer matches too, and + * the potential for complicated maths to account for those, but we'll + * skimp on that by allowing for a 1/1500 saving. + * + * With the hash table, we take a shortcut in the "8192 tries", and + * the size of the table makes a difference in how we perform, with 13 + * bits (8192 slots) naturally being luckier than 12. Ultimately, + * either way, the compressed file is still 12.5% bigger than the + * original. + */ + size_t limit = len * 9 / 8 + 4; + + uint32_t *d32 = (uint32_t*)((void*)original.data); + struct jsf_rng rng; + jsf32_init(&rng, 2); + + for (i = 0; i < (len / 4); i++) { + d32[i] = jsf32(&rng); + } + + comp_size = attempt_round_trip(mem_ctx, original, "/tmp/random-noise", ref); + debug_message("original size %zu; compressed size %zd; ratio %.5f\n", + len, comp_size, ((double)comp_size) / len); + debug_message("expected range %zu - %zu\n", + limit - limit / 1500, limit); + + assert_true(comp_size > 0); + assert_true(comp_size < limit); + assert_true(comp_size >= limit - limit / 1500); + talloc_free(mem_ctx); +} + + +/* Tests based on [MS-XCA] 3.1 Examples */ +static void test_msft_data1(void **state) +{ + TALLOC_CTX *tmp_ctx = talloc_new(NULL); + + const char *fixed_data = "abcdefghijklmnopqrstuvwxyz"; + const uint8_t fixed_out[] = { + 0x3f, 0x00, 0x00, 0x00, 0x61, 0x62, 0x63, 0x64, + 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, + 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, + 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a }; + + ssize_t c_size; + uint8_t *out, *out2; + + out = talloc_size(tmp_ctx, 2048); + memset(out, 0x42, talloc_get_size(out)); + + c_size = lzxpress_compress((const uint8_t *)fixed_data, + strlen(fixed_data), + out, + talloc_get_size(out)); + assert_int_not_equal(c_size, -1); + assert_int_equal(c_size, sizeof(fixed_out)); + assert_memory_equal(out, fixed_out, c_size); + out2 = talloc_size(tmp_ctx, strlen(fixed_data)); + c_size = lzxpress_decompress(out, + sizeof(fixed_out), + out2, + talloc_get_size(out2)); + assert_int_not_equal(c_size, -1); + assert_int_equal(c_size, strlen(fixed_data)); + assert_memory_equal(out2, fixed_data, c_size); + + talloc_free(tmp_ctx); +} + + +static void test_msft_data2(void **state) +{ + TALLOC_CTX *tmp_ctx = talloc_new(NULL); + + const char *fixed_data = + "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc" + "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc" + "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc" + "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc" + "abcabcabcabcabcabcabcabc"; + const uint8_t fixed_out[] = { + 0xff, 0xff, 0xff, 0x1f, 0x61, 0x62, 0x63, 0x17, + 0x00, 0x0f, 0xff, 0x26, 0x01}; + + ssize_t c_size; + uint8_t *out, *out2; + + out = talloc_size(tmp_ctx, 2048); + memset(out, 0x42, talloc_get_size(out)); + + c_size = lzxpress_compress((const uint8_t *)fixed_data, + strlen(fixed_data), + out, + talloc_get_size(out)); + assert_int_not_equal(c_size, -1); + assert_int_equal(c_size, sizeof(fixed_out)); + assert_memory_equal(out, fixed_out, c_size); + + out2 = talloc_size(tmp_ctx, strlen(fixed_data)); + c_size = lzxpress_decompress(out, + sizeof(fixed_out), + out2, + talloc_get_size(out2)); + + assert_int_not_equal(c_size, -1); + assert_int_equal(c_size, strlen(fixed_data)); + assert_memory_equal(out2, fixed_data, c_size); + + talloc_free(tmp_ctx); +} + +/* + test lzxpress + */ +static void test_lzxpress(void **state) +{ + TALLOC_CTX *tmp_ctx = talloc_new(NULL); + const char *fixed_data = "this is a test. and this is a test too"; + const uint8_t fixed_out[] = { + 0xff, 0x21, 0x00, 0x04, 0x74, 0x68, 0x69, 0x73, + 0x20, 0x10, 0x00, 0x61, 0x20, 0x74, 0x65, 0x73, + 0x74, 0x2E, 0x20, 0x61, 0x6E, 0x64, 0x20, 0x9F, + 0x00, 0x04, 0x20, 0x74, 0x6F, 0x6F }; + + const uint8_t fixed_out_old_version[] = { + 0x00, 0x20, 0x00, 0x04, 0x74, 0x68, 0x69, 0x73, + 0x20, 0x10, 0x00, 0x61, 0x20, 0x74, 0x65, 0x73, + 0x74, 0x2E, 0x20, 0x61, 0x6E, 0x64, 0x20, 0x9F, + 0x00, 0x04, 0x20, 0x74, 0x6F, 0x6F, 0x00, 0x00, + 0x00, 0x00 }; + + ssize_t c_size; + uint8_t *out, *out2, *out3; + + out = talloc_size(tmp_ctx, 2048); + memset(out, 0x42, talloc_get_size(out)); + + c_size = lzxpress_compress((const uint8_t *)fixed_data, + strlen(fixed_data), + out, + talloc_get_size(out)); + + assert_int_not_equal(c_size, -1); + assert_int_equal(c_size, sizeof(fixed_out)); + assert_memory_equal(out, fixed_out, c_size); + + out2 = talloc_size(tmp_ctx, strlen(fixed_data)); + c_size = lzxpress_decompress(out, + sizeof(fixed_out), + out2, + talloc_get_size(out2)); + + assert_int_not_equal(c_size, -1); + assert_int_equal(c_size, strlen(fixed_data)); + assert_memory_equal(out2, fixed_data, c_size); + + out3 = talloc_size(tmp_ctx, strlen(fixed_data)); + c_size = lzxpress_decompress(fixed_out_old_version, + sizeof(fixed_out_old_version), + out3, + talloc_get_size(out3)); + + assert_int_not_equal(c_size, -1); + assert_int_equal(c_size, strlen(fixed_data)); + assert_memory_equal(out3, fixed_data, c_size); + + talloc_free(tmp_ctx); +} + +static void test_lzxpress2(void **state) +{ + /* + * Use two matches, separated by a literal, and each with a length + * greater than 10, to test the use of nibble_index. Both length values + * (less ten) should be stored as adjacent nibbles to form the 0x21 + * byte. + */ + + TALLOC_CTX *tmp_ctx = talloc_new(NULL); + const char *fixed_data = "aaaaaaaaaaaabaaaaaaaaaaaa"; + const uint8_t fixed_out[] = { + 0xff, 0xff, 0xff, 0x5f, 0x61, 0x07, 0x00, 0x21, + 0x62, 0x67, 0x00}; + + ssize_t c_size; + uint8_t *out, *out2; + + out = talloc_size(tmp_ctx, 2048); + memset(out, 0x42, talloc_get_size(out)); + + c_size = lzxpress_compress((const uint8_t *)fixed_data, + strlen(fixed_data), + out, + talloc_get_size(out)); + + assert_int_not_equal(c_size, -1); + assert_int_equal(c_size, sizeof(fixed_out)); + assert_memory_equal(out, fixed_out, c_size); + + out2 = talloc_size(tmp_ctx, strlen(fixed_data)); + c_size = lzxpress_decompress(out, + sizeof(fixed_out), + out2, + talloc_get_size(out2)); + + assert_int_not_equal(c_size, -1); + assert_int_equal(c_size, strlen(fixed_data)); + assert_memory_equal(out2, fixed_data, c_size); + + talloc_free(tmp_ctx); +} + +static void test_lzxpress3(void **state) +{ + /* + * Use a series of 31 literals, followed by a single minimum-length + * match (and a terminating literal), to test setting indic_pos when the + * 32-bit flags value overflows after a match. + */ + + TALLOC_CTX *tmp_ctx = talloc_new(NULL); + const char *fixed_data = "abcdefghijklmnopqrstuvwxyz01234abca"; + const uint8_t fixed_out[] = { + 0x01, 0x00, 0x00, 0x00, 0x61, 0x62, 0x63, 0x64, + 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, + 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, + 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x30, 0x31, + 0x32, 0x33, 0x34, 0xf0, 0x00, 0xff, 0xff, 0xff, + 0x7f, 0x61}; + + ssize_t c_size; + uint8_t *out, *out2; + + out = talloc_size(tmp_ctx, 2048); + memset(out, 0x42, talloc_get_size(out)); + + c_size = lzxpress_compress((const uint8_t *)fixed_data, + strlen(fixed_data), + out, + talloc_get_size(out)); + + assert_int_not_equal(c_size, -1); + assert_int_equal(c_size, sizeof(fixed_out)); + assert_memory_equal(out, fixed_out, c_size); + + out2 = talloc_size(tmp_ctx, strlen(fixed_data)); + c_size = lzxpress_decompress(out, + sizeof(fixed_out), + out2, + talloc_get_size(out2)); + + assert_int_not_equal(c_size, -1); + assert_int_equal(c_size, strlen(fixed_data)); + assert_memory_equal(out2, fixed_data, c_size); + + talloc_free(tmp_ctx); +} + +static void test_lzxpress4(void **state) +{ + /* + * Use a series of 31 literals, followed by a single minimum-length + * match, to test that the final set of 32-bit flags is written + * correctly when it is empty. + */ + + TALLOC_CTX *tmp_ctx = talloc_new(NULL); + const char *fixed_data = "abcdefghijklmnopqrstuvwxyz01234abc"; + const uint8_t fixed_out[] = { + 0x01, 0x00, 0x00, 0x00, 0x61, 0x62, 0x63, 0x64, + 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, + 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, + 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x30, 0x31, + 0x32, 0x33, 0x34, 0xf0, 0x00, 0xff, 0xff, 0xff, + 0xff}; + + ssize_t c_size; + uint8_t *out, *out2; + + out = talloc_size(tmp_ctx, 2048); + memset(out, 0x42, talloc_get_size(out)); + + c_size = lzxpress_compress((const uint8_t *)fixed_data, + strlen(fixed_data), + out, + talloc_get_size(out)); + + assert_int_not_equal(c_size, -1); + assert_int_equal(c_size, sizeof(fixed_out)); + assert_memory_equal(out, fixed_out, c_size); + + out2 = talloc_size(tmp_ctx, strlen(fixed_data)); + c_size = lzxpress_decompress(out, + sizeof(fixed_out), + out2, + talloc_get_size(out2)); + + assert_int_not_equal(c_size, -1); + assert_int_equal(c_size, strlen(fixed_data)); + assert_memory_equal(out2, fixed_data, c_size); + + talloc_free(tmp_ctx); +} + + +static void test_lzxpress_many_zeros(void **state) +{ + /* + * Repeated values (zero is convenient but not special) will lead to + * very long substring searches in compression, which can be very slow + * if we're not careful. + * + * This test makes a very loose assertion about how long it should + * take to compress a million zeros. + * + * Wall clock time *should* be < 0.1 seconds with the fix and around a + * minute without it. We try for CLOCK_THREAD_CPUTIME_ID which should + * filter out some noise on the machine, and set the threshold at 5 + * seconds. + */ + + TALLOC_CTX *tmp_ctx = talloc_new(NULL); + const size_t N_ZEROS = 1000000; + const uint8_t *zeros = talloc_zero_size(tmp_ctx, N_ZEROS); + const ssize_t expected_c_size_max = 120; + const ssize_t expected_c_size_min = 93; + ssize_t c_size; + uint8_t *comp, *decomp; + static struct timespec t_start, t_end; + uint64_t elapsed_ns; + + if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &t_start) != 0) { + if (clock_gettime(CUSTOM_CLOCK_MONOTONIC, &t_start) != 0) { + clock_gettime(CLOCK_REALTIME, &t_start); + } + } + + comp = talloc_zero_size(tmp_ctx, 2048); + + c_size = lzxpress_compress(zeros, + N_ZEROS, + comp, + talloc_get_size(comp)); + /* + * Because our compression depends on heuristics, we don't insist on + * an exact size in this case. + */ + + assert_true(c_size <= expected_c_size_max); + assert_true(c_size >= expected_c_size_min); + + decomp = talloc_size(tmp_ctx, N_ZEROS * 2); + c_size = lzxpress_decompress(comp, + c_size, + decomp, + N_ZEROS * 2); + + if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &t_end) != 0) { + if (clock_gettime(CUSTOM_CLOCK_MONOTONIC, &t_end) != 0) { + clock_gettime(CLOCK_REALTIME, &t_end); + } + } + elapsed_ns = ( + (t_end.tv_sec - t_start.tv_sec) * 1000U * 1000U * 1000U) + + (t_end.tv_nsec - t_start.tv_nsec); + print_message("round-trip time: %"PRIu64" ns\n", elapsed_ns); + assert_true(elapsed_ns < 3 * 1000U * 1000U * 1000U); + assert_memory_equal(decomp, zeros, N_ZEROS); + + talloc_free(tmp_ctx); +} + + +static void test_lzxpress_round_trip(void **state) +{ + /* + * Examples found using via fuzzing. + */ + TALLOC_CTX *tmp_ctx = talloc_new(NULL); + size_t i; + struct b64_pair { + const char *uncompressed; + const char *compressed; + } pairs[] = { + { /* this results in a trailing flags block */ + "AAICAmq/EKdP785YU2Ddh7d4vUtdlQyLeHV09LHpUBw=", + "AAAAAAACAgJqvxCnT+/OWFNg3Ye3eL1LXZUMi3h1dPSx6VAc/////w==", + }, + { /* empty string compresses to empty string */ + "", "" + }, + }; + const size_t alloc_size = 1000; + uint8_t *data = talloc_array(tmp_ctx, uint8_t, alloc_size); + + for (i = 0; i < ARRAY_SIZE(pairs); i++) { + ssize_t len; + DATA_BLOB uncomp = base64_decode_data_blob_talloc( + tmp_ctx, + pairs[i].uncompressed); + DATA_BLOB comp = base64_decode_data_blob_talloc( + tmp_ctx, + pairs[i].compressed); + + len = lzxpress_compress(uncomp.data, + uncomp.length, + data, + alloc_size); + + assert_int_not_equal(len, -1); + assert_int_equal(len, comp.length); + + assert_memory_equal(comp.data, data, len); + + len = lzxpress_decompress(comp.data, + comp.length, + data, + alloc_size); + + assert_int_not_equal(len, -1); + assert_int_equal(len, uncomp.length); + + assert_memory_equal(uncomp.data, data, len); + } + talloc_free(tmp_ctx); +} + + +int main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_lzxpress_plain_decompress_files), + cmocka_unit_test(test_lzxpress_plain_decompress_more_compressed_files), + cmocka_unit_test(test_lzxpress_plain_round_trip_files), + cmocka_unit_test(test_lzxpress_plain_long_gpl_round_trip), + cmocka_unit_test(test_lzxpress_plain_long_random_graph_round_trip), + cmocka_unit_test(test_lzxpress_plain_chaos_graph_round_trip), + cmocka_unit_test(test_lzxpress_plain_sparse_random_graph_round_trip), + cmocka_unit_test(test_lzxpress_plain_long_random_graph_round_trip), + cmocka_unit_test(test_lzxpress_plain_random_noise_round_trip), + cmocka_unit_test(test_lzxpress), + cmocka_unit_test(test_msft_data1), + cmocka_unit_test(test_msft_data2), + cmocka_unit_test(test_lzxpress2), + cmocka_unit_test(test_lzxpress3), + cmocka_unit_test(test_lzxpress4), + cmocka_unit_test(test_lzxpress_many_zeros), + cmocka_unit_test(test_lzxpress_round_trip), + }; + if (!isatty(1)) { + cmocka_set_message_output(CM_OUTPUT_SUBUNIT); + } + return cmocka_run_group_tests(tests, NULL, NULL); +} diff --git a/lib/compression/wscript_build b/lib/compression/wscript_build new file mode 100644 index 0000000..61fe4a9 --- /dev/null +++ b/lib/compression/wscript_build @@ -0,0 +1,25 @@ +#!/usr/bin/env python + +bld.SAMBA_SUBSYSTEM('LZXPRESS', + deps='replace talloc stable_sort samba-debug', + source='lzxpress.c lzxpress_huffman.c' + ) + +bld.SAMBA_BINARY('test_lzx_huffman', + source='tests/test_lzx_huffman.c', + deps=('cmocka replace LZXPRESS' + ' samba-util'), + local_include=False, + for_selftest=True) + +bld.SAMBA_BINARY('test_lzxpress_plain', + source='tests/test_lzxpress_plain.c', + deps=('cmocka replace LZXPRESS' + ' samba-util'), + local_include=False, + for_selftest=True) + +bld.SAMBA_PYTHON('pycompression', + 'pycompression.c', + deps='LZXPRESS', + realname='samba/compression.so') -- cgit v1.2.3