summaryrefslogtreecommitdiffstats
path: root/lib/compression/lzxpress.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/compression/lzxpress.c')
-rw-r--r--lib/compression/lzxpress.c507
1 files changed, 507 insertions, 0 deletions
diff --git a/lib/compression/lzxpress.c b/lib/compression/lzxpress.c
new file mode 100644
index 0000000..5e5e5ba
--- /dev/null
+++ b/lib/compression/lzxpress.c
@@ -0,0 +1,507 @@
+/*
+ * Copyright (C) Matthieu Suiche 2008
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include "replace.h"
+#include "lzxpress.h"
+#include "../lib/util/byteorder.h"
+
+
+#define __CHECK_BYTES(__size, __index, __needed) do { \
+ if (unlikely(__index >= __size)) { \
+ return -1; \
+ } else { \
+ uint32_t __avail = __size - __index; \
+ if (unlikely(__needed > __avail)) { \
+ return -1; \
+ } \
+ } \
+} while(0)
+
+
+/*
+ * LZX_PLAIN_COMP_HASH_BITS determines how big the hash table for finding
+ * matches will be.
+ *
+ * The window in which we look for matches is 8192 bytes. That means with
+ * random data a value of 13 is getting close to no collisions, while a 12
+ * will miss about half the possible matches. With compressible data there
+ * will generally be fewer and less diverse entries, so collisions are rarer.
+ *
+ * In the testsuite, bith 12 and 13 give better compression than Windows, but
+ * 12 is faster. 11 does not save time and costs accuracy. Thus we prefer 12.
+ */
+#define LZX_PLAIN_COMP_HASH_BITS 12
+/*
+ * LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS is how far ahead to search in the
+ * circular hash table for a match, before we give up. A bigger number will
+ * generally lead to better but slower compression, but a stupidly big number
+ * will just be worse.
+ */
+#define LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS 5
+#define HASH_MASK ((1 << LZX_PLAIN_COMP_HASH_BITS) - 1)
+
+static inline uint16_t three_byte_hash(const uint8_t *bytes)
+{
+ uint16_t a = bytes[0];
+ uint16_t b = bytes[1] ^ 0x2e;
+ uint16_t c = bytes[2] ^ 0x55;
+ uint16_t ca = c - a;
+ uint16_t d = ((a + b) << 8) ^ (ca << 5) ^ (c + b) ^ (0xcab + a);
+ return d & HASH_MASK;
+}
+
+
+static inline void store_match(uint32_t *hash_table,
+ uint16_t h,
+ uint32_t offset)
+{
+ int i;
+ uint32_t o = hash_table[h];
+ uint16_t h2;
+ uint16_t worst_h;
+ int worst_score;
+
+ if (o >= offset) {
+ /* there is nothing there yet */
+ hash_table[h] = offset;
+ return;
+ }
+ for (i = 1; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) {
+ h2 = (h + i) & HASH_MASK;
+ if (hash_table[h2] >= offset) {
+ hash_table[h2] = offset;
+ return;
+ }
+ }
+ /*
+ * There are no slots, but we really want to store this, so we'll kick
+ * out the one with the longest distance.
+ */
+ worst_h = h;
+ worst_score = offset - o;
+ for (i = 1; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) {
+ int score;
+ h2 = (h + i) & HASH_MASK;
+ o = hash_table[h2];
+ score = offset - o;
+ if (score > worst_score) {
+ worst_score = score;
+ worst_h = h2;
+ }
+ }
+ hash_table[worst_h] = offset;
+}
+
+
+struct match {
+ const uint8_t *there;
+ uint32_t length;
+};
+
+
+static inline struct match lookup_match(uint32_t *hash_table,
+ uint16_t h,
+ const uint8_t *data,
+ uint32_t offset,
+ size_t max_len)
+{
+ int i;
+ uint32_t o;
+ uint16_t h2;
+ size_t len;
+ const uint8_t *there = NULL;
+ const uint8_t *here = data + offset;
+ struct match best = {0};
+
+ for (i = 0; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) {
+ h2 = (h + i) & HASH_MASK;
+ o = hash_table[h2];
+ if (o >= offset) {
+ /*
+ * Either this is 0xffffffff, or something is really
+ * wrong.
+ *
+ * In setting this, we would never have stepped over
+ * an 0xffffffff, so we won't now.
+ */
+ break;
+ }
+ if (offset - o > 8192) {
+ /* Too far away to use */
+ continue;
+ }
+ there = data + o;
+ /*
+ * When we already have a long match, we can try to avoid
+ * measuring out another long, but shorter match.
+ */
+ if (best.length > 1000 &&
+ there[best.length - 1] != best.there[best.length - 1]) {
+ continue;
+ }
+
+ for (len = 0;
+ len < max_len && here[len] == there[len];
+ len++) {
+ /* counting */
+ }
+ if (len > 2) {
+ if (len > best.length) {
+ best.length = len;
+ best.there = there;
+ }
+ }
+ }
+ return best;
+}
+
+struct write_context {
+ uint8_t *compressed;
+ uint32_t compressed_pos;
+ uint32_t max_compressed_size;
+ uint32_t indic;
+ uint32_t indic_bit;
+ uint32_t indic_pos;
+ uint32_t nibble_index;
+};
+
+
+#define CHECK_INPUT_BYTES(__needed) \
+ __CHECK_BYTES(uncompressed_size, uncompressed_pos, __needed)
+#define CHECK_OUTPUT_BYTES(__needed) \
+ __CHECK_BYTES(wc->max_compressed_size, wc->compressed_pos, __needed)
+
+
+static inline ssize_t push_indicator_bit(struct write_context *wc, uint32_t bit)
+{
+ wc->indic = (wc->indic << 1) | bit;
+ wc->indic_bit += 1;
+
+ if (wc->indic_bit == 32) {
+ PUSH_LE_U32(wc->compressed, wc->indic_pos, wc->indic);
+ wc->indic_bit = 0;
+ CHECK_OUTPUT_BYTES(sizeof(uint32_t));
+ wc->indic_pos = wc->compressed_pos;
+ wc->compressed_pos += sizeof(uint32_t);
+ }
+ return wc->indic_pos;
+}
+
+
+static ssize_t encode_match(struct write_context *wc,
+ struct match match,
+ const uint8_t *here)
+{
+ uint32_t match_len = match.length - 3;
+ uint32_t best_offset = here - match.there - 1;
+ uint16_t metadata;
+
+ if (best_offset > 8191) {
+ return -1;
+ }
+
+ CHECK_OUTPUT_BYTES(sizeof(uint16_t));
+ metadata = (uint16_t)((best_offset << 3) | MIN(match_len, 7));
+ PUSH_LE_U16(wc->compressed, wc->compressed_pos, metadata);
+ wc->compressed_pos += sizeof(uint16_t);
+
+ if (match_len >= 7) {
+ match_len -= 7;
+
+ if (wc->nibble_index == 0) {
+ wc->nibble_index = wc->compressed_pos;
+
+ CHECK_OUTPUT_BYTES(sizeof(uint8_t));
+ wc->compressed[wc->nibble_index] = MIN(match_len, 15);
+ wc->compressed_pos += sizeof(uint8_t);
+ } else {
+ wc->compressed[wc->nibble_index] |= MIN(match_len, 15) << 4;
+ wc->nibble_index = 0;
+ }
+
+ if (match_len >= 15) {
+ match_len -= 15;
+
+ CHECK_OUTPUT_BYTES(sizeof(uint8_t));
+ wc->compressed[wc->compressed_pos] = MIN(match_len, 255);
+ wc->compressed_pos += sizeof(uint8_t);
+
+ if (match_len >= 255) {
+ /* Additional match_len */
+
+ match_len += 7 + 15;
+
+ if (match_len < (1 << 16)) {
+ CHECK_OUTPUT_BYTES(sizeof(uint16_t));
+ PUSH_LE_U16(wc->compressed, wc->compressed_pos,
+ match_len);
+ wc->compressed_pos += sizeof(uint16_t);
+ } else {
+ CHECK_OUTPUT_BYTES(sizeof(uint16_t) +
+ sizeof(uint32_t));
+ PUSH_LE_U16(wc->compressed,
+ wc->compressed_pos, 0);
+ wc->compressed_pos += sizeof(uint16_t);
+
+ PUSH_LE_U32(wc->compressed,
+ wc->compressed_pos,
+ match_len);
+ wc->compressed_pos += sizeof(uint32_t);
+ }
+ }
+ }
+ }
+ return push_indicator_bit(wc, 1);
+}
+
+#undef CHECK_OUTPUT_BYTES
+#define CHECK_OUTPUT_BYTES(__needed) \
+ __CHECK_BYTES(wc.max_compressed_size, wc.compressed_pos, __needed)
+
+
+ssize_t lzxpress_compress(const uint8_t *uncompressed,
+ uint32_t uncompressed_size,
+ uint8_t *compressed,
+ uint32_t max_compressed_size)
+{
+ /*
+ * This is the algorithm in [MS-XCA] 2.3 "Plain LZ77 Compression".
+ *
+ * It avoids Huffman encoding by including literal bytes inline when a
+ * match is not found. Every so often it includes a uint32 bit map
+ * flagging which positions contain matches and which contain
+ * literals. The encoding of matches is of variable size, depending on
+ * the match length; they are always at least 16 bits long, and can
+ * implicitly use unused half-bytes from earlier in the stream.
+ */
+ ssize_t ret;
+ uint32_t uncompressed_pos;
+ struct write_context wc = {
+ .indic = 0,
+ .indic_pos = 0,
+ .indic_bit = 0,
+ .nibble_index = 0,
+ .compressed = compressed,
+ .compressed_pos = 0,
+ .max_compressed_size = max_compressed_size
+ };
+ uint32_t hash_table[1 << LZX_PLAIN_COMP_HASH_BITS];
+ memset(hash_table, 0xff, sizeof(hash_table));
+
+ if (!uncompressed_size) {
+ return 0;
+ }
+
+ uncompressed_pos = 0;
+ CHECK_OUTPUT_BYTES(sizeof(uint32_t));
+ PUSH_LE_U32(wc.compressed, wc.compressed_pos, 0);
+ wc.compressed_pos += sizeof(uint32_t);
+
+ while ((uncompressed_pos < uncompressed_size) &&
+ (wc.compressed_pos < wc.max_compressed_size)) {
+
+ /* maximum len we can encode into metadata */
+ const uint32_t max_len = MIN(0xFFFF + 3,
+ uncompressed_size - uncompressed_pos);
+ const uint8_t *here = uncompressed + uncompressed_pos;
+ uint16_t h;
+ struct match match = {0};
+
+ if (max_len >= 3) {
+ h = three_byte_hash(here);
+ match = lookup_match(hash_table,
+ h,
+ uncompressed,
+ uncompressed_pos,
+ max_len);
+
+ store_match(hash_table, h, uncompressed_pos);
+ } else {
+ match.there = NULL;
+ match.length = 0;
+ }
+
+ if (match.there == NULL) {
+ /*
+ * This is going to be a literal byte, which we flag
+ * by setting a bit in an indicator field somewhere
+ * earlier in the stream.
+ */
+ CHECK_INPUT_BYTES(sizeof(uint8_t));
+ CHECK_OUTPUT_BYTES(sizeof(uint8_t));
+ wc.compressed[wc.compressed_pos++] = *here;
+ uncompressed_pos++;
+
+ ret = push_indicator_bit(&wc, 0);
+ if (ret < 0) {
+ return ret;
+ }
+ } else {
+ ret = encode_match(&wc, match, here);
+ if (ret < 0) {
+ return ret;
+ }
+ uncompressed_pos += match.length;
+ }
+ }
+
+ if (wc.indic_bit != 0) {
+ wc.indic <<= 32 - wc.indic_bit;
+ }
+ wc.indic |= UINT32_MAX >> wc.indic_bit;
+ PUSH_LE_U32(wc.compressed, wc.indic_pos, wc.indic);
+
+ return wc.compressed_pos;
+}
+
+ssize_t lzxpress_decompress(const uint8_t *input,
+ uint32_t input_size,
+ uint8_t *output,
+ uint32_t max_output_size)
+{
+ /*
+ * This is the algorithm in [MS-XCA] 2.4 "Plain LZ77 Decompression
+ * Algorithm Details".
+ */
+ uint32_t output_index, input_index;
+ uint32_t indicator, indicator_bit;
+ uint32_t nibble_index;
+
+ if (input_size == 0) {
+ return 0;
+ }
+
+ output_index = 0;
+ input_index = 0;
+ indicator = 0;
+ indicator_bit = 0;
+ nibble_index = 0;
+
+#undef CHECK_INPUT_BYTES
+#define CHECK_INPUT_BYTES(__needed) \
+ __CHECK_BYTES(input_size, input_index, __needed)
+#undef CHECK_OUTPUT_BYTES
+#define CHECK_OUTPUT_BYTES(__needed) \
+ __CHECK_BYTES(max_output_size, output_index, __needed)
+
+ do {
+ if (indicator_bit == 0) {
+ CHECK_INPUT_BYTES(sizeof(uint32_t));
+ indicator = PULL_LE_U32(input, input_index);
+ input_index += sizeof(uint32_t);
+ if (input_index == input_size) {
+ /*
+ * The compressor left room for indicator
+ * flags for data that doesn't exist.
+ */
+ break;
+ }
+ indicator_bit = 32;
+ }
+ indicator_bit--;
+
+ /*
+ * check whether the bit specified by indicator_bit is set or not
+ * set in indicator. For example, if indicator_bit has value 4
+ * check whether the 4th bit of the value in indicator is set
+ */
+ if (((indicator >> indicator_bit) & 1) == 0) {
+ CHECK_INPUT_BYTES(sizeof(uint8_t));
+ CHECK_OUTPUT_BYTES(sizeof(uint8_t));
+ output[output_index] = input[input_index];
+ input_index += sizeof(uint8_t);
+ output_index += sizeof(uint8_t);
+ } else {
+ uint32_t length;
+ uint32_t offset;
+
+ CHECK_INPUT_BYTES(sizeof(uint16_t));
+ length = PULL_LE_U16(input, input_index);
+ input_index += sizeof(uint16_t);
+ offset = (length >> 3) + 1;
+ length &= 7;
+
+ if (length == 7) {
+ if (nibble_index == 0) {
+ CHECK_INPUT_BYTES(sizeof(uint8_t));
+ nibble_index = input_index;
+ length = input[input_index] & 0xf;
+ input_index += sizeof(uint8_t);
+ } else {
+ length = input[nibble_index] >> 4;
+ nibble_index = 0;
+ }
+
+ if (length == 15) {
+ CHECK_INPUT_BYTES(sizeof(uint8_t));
+ length = input[input_index];
+ input_index += sizeof(uint8_t);
+ if (length == 255) {
+ CHECK_INPUT_BYTES(sizeof(uint16_t));
+ length = PULL_LE_U16(input, input_index);
+ input_index += sizeof(uint16_t);
+ if (length == 0) {
+ CHECK_INPUT_BYTES(sizeof(uint32_t));
+ length = PULL_LE_U32(input, input_index);
+ input_index += sizeof(uint32_t);
+ }
+
+ if (length < (15 + 7)) {
+ return -1;
+ }
+ length -= (15 + 7);
+ }
+ length += 15;
+ }
+ length += 7;
+ }
+ length += 3;
+
+ if (length == 0) {
+ return -1;
+ }
+
+ for (; length > 0; --length) {
+ if (offset > output_index) {
+ return -1;
+ }
+ CHECK_OUTPUT_BYTES(sizeof(uint8_t));
+ output[output_index] = output[output_index - offset];
+ output_index += sizeof(uint8_t);
+ }
+ }
+ } while ((output_index < max_output_size) && (input_index < (input_size)));
+
+ return output_index;
+}