diff options
Diffstat (limited to 'lib/compression/lzxpress.c')
-rw-r--r-- | lib/compression/lzxpress.c | 507 |
1 files changed, 507 insertions, 0 deletions
diff --git a/lib/compression/lzxpress.c b/lib/compression/lzxpress.c new file mode 100644 index 0000000..5e5e5ba --- /dev/null +++ b/lib/compression/lzxpress.c @@ -0,0 +1,507 @@ +/* + * Copyright (C) Matthieu Suiche 2008 + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the author nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include "replace.h" +#include "lzxpress.h" +#include "../lib/util/byteorder.h" + + +#define __CHECK_BYTES(__size, __index, __needed) do { \ + if (unlikely(__index >= __size)) { \ + return -1; \ + } else { \ + uint32_t __avail = __size - __index; \ + if (unlikely(__needed > __avail)) { \ + return -1; \ + } \ + } \ +} while(0) + + +/* + * LZX_PLAIN_COMP_HASH_BITS determines how big the hash table for finding + * matches will be. + * + * The window in which we look for matches is 8192 bytes. That means with + * random data a value of 13 is getting close to no collisions, while a 12 + * will miss about half the possible matches. With compressible data there + * will generally be fewer and less diverse entries, so collisions are rarer. + * + * In the testsuite, bith 12 and 13 give better compression than Windows, but + * 12 is faster. 11 does not save time and costs accuracy. Thus we prefer 12. + */ +#define LZX_PLAIN_COMP_HASH_BITS 12 +/* + * LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS is how far ahead to search in the + * circular hash table for a match, before we give up. A bigger number will + * generally lead to better but slower compression, but a stupidly big number + * will just be worse. + */ +#define LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS 5 +#define HASH_MASK ((1 << LZX_PLAIN_COMP_HASH_BITS) - 1) + +static inline uint16_t three_byte_hash(const uint8_t *bytes) +{ + uint16_t a = bytes[0]; + uint16_t b = bytes[1] ^ 0x2e; + uint16_t c = bytes[2] ^ 0x55; + uint16_t ca = c - a; + uint16_t d = ((a + b) << 8) ^ (ca << 5) ^ (c + b) ^ (0xcab + a); + return d & HASH_MASK; +} + + +static inline void store_match(uint32_t *hash_table, + uint16_t h, + uint32_t offset) +{ + int i; + uint32_t o = hash_table[h]; + uint16_t h2; + uint16_t worst_h; + int worst_score; + + if (o >= offset) { + /* there is nothing there yet */ + hash_table[h] = offset; + return; + } + for (i = 1; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) { + h2 = (h + i) & HASH_MASK; + if (hash_table[h2] >= offset) { + hash_table[h2] = offset; + return; + } + } + /* + * There are no slots, but we really want to store this, so we'll kick + * out the one with the longest distance. + */ + worst_h = h; + worst_score = offset - o; + for (i = 1; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) { + int score; + h2 = (h + i) & HASH_MASK; + o = hash_table[h2]; + score = offset - o; + if (score > worst_score) { + worst_score = score; + worst_h = h2; + } + } + hash_table[worst_h] = offset; +} + + +struct match { + const uint8_t *there; + uint32_t length; +}; + + +static inline struct match lookup_match(uint32_t *hash_table, + uint16_t h, + const uint8_t *data, + uint32_t offset, + size_t max_len) +{ + int i; + uint32_t o; + uint16_t h2; + size_t len; + const uint8_t *there = NULL; + const uint8_t *here = data + offset; + struct match best = {0}; + + for (i = 0; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) { + h2 = (h + i) & HASH_MASK; + o = hash_table[h2]; + if (o >= offset) { + /* + * Either this is 0xffffffff, or something is really + * wrong. + * + * In setting this, we would never have stepped over + * an 0xffffffff, so we won't now. + */ + break; + } + if (offset - o > 8192) { + /* Too far away to use */ + continue; + } + there = data + o; + /* + * When we already have a long match, we can try to avoid + * measuring out another long, but shorter match. + */ + if (best.length > 1000 && + there[best.length - 1] != best.there[best.length - 1]) { + continue; + } + + for (len = 0; + len < max_len && here[len] == there[len]; + len++) { + /* counting */ + } + if (len > 2) { + if (len > best.length) { + best.length = len; + best.there = there; + } + } + } + return best; +} + +struct write_context { + uint8_t *compressed; + uint32_t compressed_pos; + uint32_t max_compressed_size; + uint32_t indic; + uint32_t indic_bit; + uint32_t indic_pos; + uint32_t nibble_index; +}; + + +#define CHECK_INPUT_BYTES(__needed) \ + __CHECK_BYTES(uncompressed_size, uncompressed_pos, __needed) +#define CHECK_OUTPUT_BYTES(__needed) \ + __CHECK_BYTES(wc->max_compressed_size, wc->compressed_pos, __needed) + + +static inline ssize_t push_indicator_bit(struct write_context *wc, uint32_t bit) +{ + wc->indic = (wc->indic << 1) | bit; + wc->indic_bit += 1; + + if (wc->indic_bit == 32) { + PUSH_LE_U32(wc->compressed, wc->indic_pos, wc->indic); + wc->indic_bit = 0; + CHECK_OUTPUT_BYTES(sizeof(uint32_t)); + wc->indic_pos = wc->compressed_pos; + wc->compressed_pos += sizeof(uint32_t); + } + return wc->indic_pos; +} + + +static ssize_t encode_match(struct write_context *wc, + struct match match, + const uint8_t *here) +{ + uint32_t match_len = match.length - 3; + uint32_t best_offset = here - match.there - 1; + uint16_t metadata; + + if (best_offset > 8191) { + return -1; + } + + CHECK_OUTPUT_BYTES(sizeof(uint16_t)); + metadata = (uint16_t)((best_offset << 3) | MIN(match_len, 7)); + PUSH_LE_U16(wc->compressed, wc->compressed_pos, metadata); + wc->compressed_pos += sizeof(uint16_t); + + if (match_len >= 7) { + match_len -= 7; + + if (wc->nibble_index == 0) { + wc->nibble_index = wc->compressed_pos; + + CHECK_OUTPUT_BYTES(sizeof(uint8_t)); + wc->compressed[wc->nibble_index] = MIN(match_len, 15); + wc->compressed_pos += sizeof(uint8_t); + } else { + wc->compressed[wc->nibble_index] |= MIN(match_len, 15) << 4; + wc->nibble_index = 0; + } + + if (match_len >= 15) { + match_len -= 15; + + CHECK_OUTPUT_BYTES(sizeof(uint8_t)); + wc->compressed[wc->compressed_pos] = MIN(match_len, 255); + wc->compressed_pos += sizeof(uint8_t); + + if (match_len >= 255) { + /* Additional match_len */ + + match_len += 7 + 15; + + if (match_len < (1 << 16)) { + CHECK_OUTPUT_BYTES(sizeof(uint16_t)); + PUSH_LE_U16(wc->compressed, wc->compressed_pos, + match_len); + wc->compressed_pos += sizeof(uint16_t); + } else { + CHECK_OUTPUT_BYTES(sizeof(uint16_t) + + sizeof(uint32_t)); + PUSH_LE_U16(wc->compressed, + wc->compressed_pos, 0); + wc->compressed_pos += sizeof(uint16_t); + + PUSH_LE_U32(wc->compressed, + wc->compressed_pos, + match_len); + wc->compressed_pos += sizeof(uint32_t); + } + } + } + } + return push_indicator_bit(wc, 1); +} + +#undef CHECK_OUTPUT_BYTES +#define CHECK_OUTPUT_BYTES(__needed) \ + __CHECK_BYTES(wc.max_compressed_size, wc.compressed_pos, __needed) + + +ssize_t lzxpress_compress(const uint8_t *uncompressed, + uint32_t uncompressed_size, + uint8_t *compressed, + uint32_t max_compressed_size) +{ + /* + * This is the algorithm in [MS-XCA] 2.3 "Plain LZ77 Compression". + * + * It avoids Huffman encoding by including literal bytes inline when a + * match is not found. Every so often it includes a uint32 bit map + * flagging which positions contain matches and which contain + * literals. The encoding of matches is of variable size, depending on + * the match length; they are always at least 16 bits long, and can + * implicitly use unused half-bytes from earlier in the stream. + */ + ssize_t ret; + uint32_t uncompressed_pos; + struct write_context wc = { + .indic = 0, + .indic_pos = 0, + .indic_bit = 0, + .nibble_index = 0, + .compressed = compressed, + .compressed_pos = 0, + .max_compressed_size = max_compressed_size + }; + uint32_t hash_table[1 << LZX_PLAIN_COMP_HASH_BITS]; + memset(hash_table, 0xff, sizeof(hash_table)); + + if (!uncompressed_size) { + return 0; + } + + uncompressed_pos = 0; + CHECK_OUTPUT_BYTES(sizeof(uint32_t)); + PUSH_LE_U32(wc.compressed, wc.compressed_pos, 0); + wc.compressed_pos += sizeof(uint32_t); + + while ((uncompressed_pos < uncompressed_size) && + (wc.compressed_pos < wc.max_compressed_size)) { + + /* maximum len we can encode into metadata */ + const uint32_t max_len = MIN(0xFFFF + 3, + uncompressed_size - uncompressed_pos); + const uint8_t *here = uncompressed + uncompressed_pos; + uint16_t h; + struct match match = {0}; + + if (max_len >= 3) { + h = three_byte_hash(here); + match = lookup_match(hash_table, + h, + uncompressed, + uncompressed_pos, + max_len); + + store_match(hash_table, h, uncompressed_pos); + } else { + match.there = NULL; + match.length = 0; + } + + if (match.there == NULL) { + /* + * This is going to be a literal byte, which we flag + * by setting a bit in an indicator field somewhere + * earlier in the stream. + */ + CHECK_INPUT_BYTES(sizeof(uint8_t)); + CHECK_OUTPUT_BYTES(sizeof(uint8_t)); + wc.compressed[wc.compressed_pos++] = *here; + uncompressed_pos++; + + ret = push_indicator_bit(&wc, 0); + if (ret < 0) { + return ret; + } + } else { + ret = encode_match(&wc, match, here); + if (ret < 0) { + return ret; + } + uncompressed_pos += match.length; + } + } + + if (wc.indic_bit != 0) { + wc.indic <<= 32 - wc.indic_bit; + } + wc.indic |= UINT32_MAX >> wc.indic_bit; + PUSH_LE_U32(wc.compressed, wc.indic_pos, wc.indic); + + return wc.compressed_pos; +} + +ssize_t lzxpress_decompress(const uint8_t *input, + uint32_t input_size, + uint8_t *output, + uint32_t max_output_size) +{ + /* + * This is the algorithm in [MS-XCA] 2.4 "Plain LZ77 Decompression + * Algorithm Details". + */ + uint32_t output_index, input_index; + uint32_t indicator, indicator_bit; + uint32_t nibble_index; + + if (input_size == 0) { + return 0; + } + + output_index = 0; + input_index = 0; + indicator = 0; + indicator_bit = 0; + nibble_index = 0; + +#undef CHECK_INPUT_BYTES +#define CHECK_INPUT_BYTES(__needed) \ + __CHECK_BYTES(input_size, input_index, __needed) +#undef CHECK_OUTPUT_BYTES +#define CHECK_OUTPUT_BYTES(__needed) \ + __CHECK_BYTES(max_output_size, output_index, __needed) + + do { + if (indicator_bit == 0) { + CHECK_INPUT_BYTES(sizeof(uint32_t)); + indicator = PULL_LE_U32(input, input_index); + input_index += sizeof(uint32_t); + if (input_index == input_size) { + /* + * The compressor left room for indicator + * flags for data that doesn't exist. + */ + break; + } + indicator_bit = 32; + } + indicator_bit--; + + /* + * check whether the bit specified by indicator_bit is set or not + * set in indicator. For example, if indicator_bit has value 4 + * check whether the 4th bit of the value in indicator is set + */ + if (((indicator >> indicator_bit) & 1) == 0) { + CHECK_INPUT_BYTES(sizeof(uint8_t)); + CHECK_OUTPUT_BYTES(sizeof(uint8_t)); + output[output_index] = input[input_index]; + input_index += sizeof(uint8_t); + output_index += sizeof(uint8_t); + } else { + uint32_t length; + uint32_t offset; + + CHECK_INPUT_BYTES(sizeof(uint16_t)); + length = PULL_LE_U16(input, input_index); + input_index += sizeof(uint16_t); + offset = (length >> 3) + 1; + length &= 7; + + if (length == 7) { + if (nibble_index == 0) { + CHECK_INPUT_BYTES(sizeof(uint8_t)); + nibble_index = input_index; + length = input[input_index] & 0xf; + input_index += sizeof(uint8_t); + } else { + length = input[nibble_index] >> 4; + nibble_index = 0; + } + + if (length == 15) { + CHECK_INPUT_BYTES(sizeof(uint8_t)); + length = input[input_index]; + input_index += sizeof(uint8_t); + if (length == 255) { + CHECK_INPUT_BYTES(sizeof(uint16_t)); + length = PULL_LE_U16(input, input_index); + input_index += sizeof(uint16_t); + if (length == 0) { + CHECK_INPUT_BYTES(sizeof(uint32_t)); + length = PULL_LE_U32(input, input_index); + input_index += sizeof(uint32_t); + } + + if (length < (15 + 7)) { + return -1; + } + length -= (15 + 7); + } + length += 15; + } + length += 7; + } + length += 3; + + if (length == 0) { + return -1; + } + + for (; length > 0; --length) { + if (offset > output_index) { + return -1; + } + CHECK_OUTPUT_BYTES(sizeof(uint8_t)); + output[output_index] = output[output_index - offset]; + output_index += sizeof(uint8_t); + } + } + } while ((output_index < max_output_size) && (input_index < (input_size))); + + return output_index; +} |