/* * Samba compression library - LGPLv3 * * Copyright © Catalyst IT 2022 * * Written by Douglas Bagnall * and Joseph Sutton * * ** NOTE! The following LGPL license applies to this file. * ** It does NOT imply that all of Samba is released under the LGPL * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 3 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see . */ #include #include "replace.h" #include "lzxpress_huffman.h" #include "lib/util/stable_sort.h" #include "lib/util/debug.h" #include "lib/util/byteorder.h" #include "lib/util/bytearray.h" /* * DEBUG_NO_LZ77_MATCHES toggles the encoding of matches as matches. If it is * false the potential match is written as a series of literals, which is a * valid but usually inefficient encoding. This is useful for isolating a * problem to either the LZ77 or the Huffman stage. */ #ifndef DEBUG_NO_LZ77_MATCHES #define DEBUG_NO_LZ77_MATCHES false #endif /* * DEBUG_HUFFMAN_TREE forces the drawing of ascii art huffman trees during * compression and decompression. * * These trees will also be drawn at DEBUG level 10, but that doesn't work * with cmocka tests. */ #ifndef DEBUG_HUFFMAN_TREE #define DEBUG_HUFFMAN_TREE false #endif #if DEBUG_HUFFMAN_TREE #define DBG(...) fprintf(stderr, __VA_ARGS__) #else #define DBG(...) DBG_INFO(__VA_ARGS__) #endif #define LZXPRESS_ERROR -1LL /* * We won't encode a match length longer than MAX_MATCH_LENGTH. * * Reports are that Windows has a limit at 64M. */ #define MAX_MATCH_LENGTH (64 * 1024 * 1024) struct bitstream { const uint8_t *bytes; size_t byte_pos; size_t byte_size; uint32_t bits; int remaining_bits; uint16_t *table; }; #if ! defined __has_builtin #define __has_builtin(x) 0 #endif /* * bitlen_nonzero_16() returns the bit number of the most significant bit, or * put another way, the integer log base 2. Log(0) is undefined; the argument * has to be non-zero! * 1 -> 0 * 2,3 -> 1 * 4-7 -> 2 * 1024 -> 10, etc * * Probably this is handled by a compiler intrinsic function that maps to a * dedicated machine instruction. */ static inline int bitlen_nonzero_16(uint16_t x) { #if __has_builtin(__builtin_clz) /* __builtin_clz returns the number of leading zeros */ return (sizeof(unsigned int) * CHAR_BIT) - 1 - __builtin_clz((unsigned int) x); #else int count = -1; while(x) { x >>= 1; count++; } return count; #endif } struct lzxhuff_compressor_context { const uint8_t *input_bytes; size_t input_size; size_t input_pos; size_t prev_block_pos; uint8_t *output; size_t available_size; size_t output_pos; }; static int compare_huffman_node_count(struct huffman_node *a, struct huffman_node *b) { return a->count - b->count; } static int compare_huffman_node_depth(struct huffman_node *a, struct huffman_node *b) { int c = a->depth - b->depth; if (c != 0) { return c; } return (int)a->symbol - (int)b->symbol; } #define HASH_MASK ((1 << LZX_HUFF_COMP_HASH_BITS) - 1) static inline uint16_t three_byte_hash(const uint8_t *bytes) { /* * MS-XCA says "three byte hash", but does not specify it. * * This one is just cobbled together, but has quite good distribution * in the 12-14 bit forms, which is what we care about most. * e.g: 13 bit: median 2048, min 2022, max 2074, stddev 6.0 */ uint16_t a = bytes[0]; uint16_t b = bytes[1] ^ 0x2e; uint16_t c = bytes[2] ^ 0x55; uint16_t ca = c - a; uint16_t d = ((a + b) << 8) ^ (ca << 5) ^ (c + b) ^ (0xcab + a); return d & HASH_MASK; } static inline uint16_t encode_match(size_t len, size_t offset) { uint16_t code = 256; code |= MIN(len - 3, 15); code |= bitlen_nonzero_16(offset) << 4; return code; } /* * debug_huffman_tree() uses debug_huffman_tree_print() to draw the Huffman * tree in ascii art. * * Note that the Huffman tree is probably not the same as that implied by the * canonical Huffman encoding that is finally used. That tree would be the * same shape, but with the left and right toggled to sort the branches by * length, after which the symbols for each length sorted by value. */ static void debug_huffman_tree_print(struct huffman_node *node, int *trail, int depth) { if (node->left == NULL) { /* time to print a row */ int j; bool branched = false; int row[17]; char c[100]; int s = node->symbol; char code[17]; if (depth > 15) { fprintf(stderr, " \033[1;31m Max depth exceeded! (%d)\033[0m " " symbol %#3x claimed depth %d count %d\n", depth, node->symbol, node->depth, node->count); return; } for (j = depth - 1; j >= 0; j--) { if (branched) { if (trail[j] == -1) { row[j] = -3; } else { row[j] = -2; } } else if (trail[j] == -1) { row[j] = -1; branched = true; } else { row[j] = trail[j]; } } for (j = 0; j < depth; j++) { switch (row[j]) { case -3: code[j] = '1'; fprintf(stderr, " "); break; case -2: code[j] = '0'; fprintf(stderr, " │ "); break; case -1: code[j] = '1'; fprintf(stderr, " ╰─"); break; default: code[j] = '0'; fprintf(stderr, "%5d─┬─", row[j]); break; } } code[depth] = 0; if (s < 32) { snprintf(c, sizeof(c), "\033[1;32m%02x\033[0m \033[1;33m%c%c%c\033[0m", s, 0xE2, 0x90, 0x80 + s); /* utf-8 for symbol */ } else if (s < 127) { snprintf(c, sizeof(c), "\033[1;32m%2x\033[0m '\033[10;32m%c\033[0m'", s, s); } else if (s < 256) { snprintf(c, sizeof(c), "\033[1;32m%2x\033[0m", s); } else { uint16_t len = (s & 15) + 3; uint16_t dbits = ((s >> 4) & 15) + 1; snprintf(c, sizeof(c), " \033[0;33mlen:%2d%s, " "dist:%d-%d \033[0m \033[1;32m%3x\033[0m%s", len, len == 18 ? "+" : "", 1 << (dbits - 1), (1 << dbits) - 1, s, s == 256 ? " \033[1;31mEOF\033[0m" : ""); } fprintf(stderr, "──%5d %s \033[2;37m%s\033[0m\n", node->count, c, code); return; } trail[depth] = node->count; debug_huffman_tree_print(node->left, trail, depth + 1); trail[depth] = -1; debug_huffman_tree_print(node->right, trail, depth + 1); } /* * If DEBUG_HUFFMAN_TREE is defined true, debug_huffman_tree() * will print a tree looking something like this: * * 7─┬─── 3 len:18+, dist:1-1 10f 0 * ╰─ 4─┬─ 2─┬─── 1 61 'a' 100 * │ ╰─── 1 62 'b' 101 * ╰─ 2─┬─── 1 63 'c' 110 * ╰─── 1 len: 3, dist:1-1 100 EOF 111 * * This is based off a Huffman root node, and the tree may not be the same as * the canonical tree. */ static void debug_huffman_tree(struct huffman_node *root) { int trail[17]; debug_huffman_tree_print(root, trail, 0); } /* * If DEBUG_HUFFMAN_TREE is defined true, debug_huffman_tree_from_table() * will print something like this based on a decoding symbol table. * * Tree from decoding table 9 nodes → 5 codes * 10000─┬─── 5000 len:18+, dist:1-1 10f 0 * ╰─ 5000─┬─ 2500─┬─── 1250 61 'a' 100 * │ ╰─── 1250 62 'b' 101 * ╰─ 2500─┬─── 1250 63 'c' 110 * ╰─── 1250 len: 3, dist:1-1 100 EOF 111 * * This is the canonical form of the Huffman tree where the actual counts * aren't known (we use "10000" to help indicate relative frequencies). */ static void debug_huffman_tree_from_table(uint16_t *table) { int trail[17]; struct huffman_node nodes[1024] = {{0}}; uint16_t codes[1024]; size_t n = 1; size_t i = 0; codes[0] = 0; nodes[0].count = 10000; while (i < n) { uint16_t index = codes[i]; struct huffman_node *node = &nodes[i]; if (table[index] == 0xffff) { /* internal node */ index <<= 1; /* left */ index++; codes[n] = index; node->left = nodes + n; nodes[n].count = node->count >> 1; n++; /*right*/ index++; codes[n] = index; node->right = nodes + n; nodes[n].count = node->count >> 1; n++; } else { /* leaf node */ node->symbol = table[index] & 511; } i++; } fprintf(stderr, "\033[1;34m Tree from decoding table\033[0m " "%zu nodes → %zu codes\n", n, (n + 1) / 2); debug_huffman_tree_print(nodes, trail, 0); } static bool depth_walk(struct huffman_node *n, uint32_t depth) { bool ok; if (n->left == NULL) { /* this is a leaf, record the depth */ n->depth = depth; return true; } if (depth > 14) { return false; } ok = (depth_walk(n->left, depth + 1) && depth_walk(n->right, depth + 1)); return ok; } static bool check_and_record_depths(struct huffman_node *root) { return depth_walk(root, 0); } static bool encode_values(struct huffman_node *leaves, size_t n_leaves, uint16_t symbol_values[512]) { size_t i; /* * See, we have a leading 1 in our internal code representation, which * indicates the code length. */ uint32_t code = 1; uint32_t code_len = 0; memset(symbol_values, 0, sizeof(uint16_t) * 512); for (i = 0; i < n_leaves; i++) { code <<= leaves[i].depth - code_len; code_len = leaves[i].depth; symbol_values[leaves[i].symbol] = code; code++; } /* * The last code should be 11111... with code_len + 1 ones. The final * code++ will wrap this round to 1000... with code_len + 1 zeroes. */ if (code != 2 << code_len) { return false; } return true; } static int generate_huffman_codes(struct huffman_node *leaf_nodes, struct huffman_node *internal_nodes, uint16_t symbol_values[512]) { size_t head_leaf = 0; size_t head_branch = 0; size_t tail_branch = 0; struct huffman_node *huffman_root = NULL; size_t i, j; size_t n_leaves = 0; /* * Before we sort the nodes, we can eliminate the unused ones. */ for (i = 0; i < 512; i++) { if (leaf_nodes[i].count) { leaf_nodes[n_leaves] = leaf_nodes[i]; n_leaves++; } } if (n_leaves == 0) { return LZXPRESS_ERROR; } if (n_leaves == 1) { /* * There is *almost* no way this should happen, and it would * ruin the tree (because the shortest possible codes are 1 * bit long, and there are two of them). * * The only way to get here is in an internal block in a * 3-or-more block message (i.e. > 128k), which consists * entirely of a match starting in the previous block (if it * was the end block, it would have the EOF symbol). * * What we do is add a dummy symbol which is this one XOR 256. * It won't be used in the stream but will balance the tree. */ leaf_nodes[1] = leaf_nodes[0]; leaf_nodes[1].symbol ^= 0x100; n_leaves = 2; } /* note, in sort we're using internal_nodes as auxiliary space */ stable_sort(leaf_nodes, internal_nodes, n_leaves, sizeof(struct huffman_node), (samba_compare_fn_t)compare_huffman_node_count); /* * This outer loop is for re-quantizing the counts if the tree is too * tall (>15), which we need to do because the final encoding can't * express a tree that deep. * * In theory, this should be a 'while (true)' loop, but we chicken * out with 10 iterations, just in case. * * In practice it will almost always resolve in the first round; if * not then, in the second or third. Remember we'll looking at 64k or * less, so the rarest we can have is 1 in 64k; each round of * quantization effectively doubles its frequency to 1 in 32k, 1 in * 16k, etc, until we're treating the rare symbol as actually quite * common. */ for (j = 0; j < 10; j++) { bool less_than_15_bits; while (true) { struct huffman_node *a = NULL; struct huffman_node *b = NULL; size_t leaf_len = n_leaves - head_leaf; size_t internal_len = tail_branch - head_branch; if (leaf_len + internal_len == 1) { /* * We have the complete tree. The root will be * an internal node unless there is just one * symbol, which is already impossible. */ if (unlikely(leaf_len == 1)) { return LZXPRESS_ERROR; } else { huffman_root = \ &internal_nodes[head_branch]; } break; } /* * We know here we have at least two nodes, and we * want to select the two lowest scoring ones. Those * have to be either a) the head of each queue, or b) * the first two nodes of either queue. * * The complicating factors are: a) we need to check * the length of each queue, and b) in the case of * ties, we prefer to pair leaves with leaves. * * Note a complication we don't have: the leaf node * queue never grows, and the subtree queue starts * empty and cannot grow beyond n - 1. It feeds on * itself. We don't need to think about overflow. */ if (leaf_len == 0) { /* two from subtrees */ a = &internal_nodes[head_branch]; b = &internal_nodes[head_branch + 1]; head_branch += 2; } else if (internal_len == 0) { /* two from nodes */ a = &leaf_nodes[head_leaf]; b = &leaf_nodes[head_leaf + 1]; head_leaf += 2; } else if (leaf_len == 1 && internal_len == 1) { /* one of each */ a = &leaf_nodes[head_leaf]; b = &internal_nodes[head_branch]; head_branch++; head_leaf++; } else { /* * Take the lowest head, twice, checking for * length after taking the first one. */ if (leaf_nodes[head_leaf].count > internal_nodes[head_branch].count) { a = &internal_nodes[head_branch]; head_branch++; if (internal_len == 1) { b = &leaf_nodes[head_leaf]; head_leaf++; goto done; } } else { a = &leaf_nodes[head_leaf]; head_leaf++; if (leaf_len == 1) { b = &internal_nodes[head_branch]; head_branch++; goto done; } } /* the other node */ if (leaf_nodes[head_leaf].count > internal_nodes[head_branch].count) { b = &internal_nodes[head_branch]; head_branch++; } else { b = &leaf_nodes[head_leaf]; head_leaf++; } } done: /* * Now we add a new node to the subtrees list that * combines the score of node_a and node_b, and points * to them as children. */ internal_nodes[tail_branch].count = a->count + b->count; internal_nodes[tail_branch].left = a; internal_nodes[tail_branch].right = b; tail_branch++; if (tail_branch == n_leaves) { /* * We're not getting here, no way, never ever. * Unless we made a terrible mistake. * * That is, in a binary tree with n leaves, * there are ALWAYS n-1 internal nodes. */ return LZXPRESS_ERROR; } } if (CHECK_DEBUGLVL(10) || DEBUG_HUFFMAN_TREE) { debug_huffman_tree(huffman_root); } /* * We have a tree, and need to turn it into a lookup table, * and see if it is shallow enough (<= 15). */ less_than_15_bits = check_and_record_depths(huffman_root); if (less_than_15_bits) { /* * Now the leaf nodes know how deep they are, and we * no longer need the internal nodes. * * We need to sort the nodes of equal depth, so that * they are sorted by depth first, and symbol value * second. The internal_nodes can again be auxiliary * memory. */ stable_sort( leaf_nodes, internal_nodes, n_leaves, sizeof(struct huffman_node), (samba_compare_fn_t)compare_huffman_node_depth); encode_values(leaf_nodes, n_leaves, symbol_values); return n_leaves; } /* * requantize by halving and rounding up, so that small counts * become relatively bigger. This will lead to a flatter tree. */ for (i = 0; i < n_leaves; i++) { leaf_nodes[i].count >>= 1; leaf_nodes[i].count += 1; } head_leaf = 0; head_branch = 0; tail_branch = 0; } return LZXPRESS_ERROR; } /* * LZX_HUFF_COMP_HASH_SEARCH_ATTEMPTS is how far ahead to search in the * circular hash table for a match, before we give up. A bigger number will * generally lead to better but slower compression, but a stupidly big number * will just be worse. * * If you're fiddling with this, consider also fiddling with * LZX_HUFF_COMP_HASH_BITS. */ #define LZX_HUFF_COMP_HASH_SEARCH_ATTEMPTS 5 static inline void store_match(uint16_t *hash_table, uint16_t h, uint16_t offset) { int i; uint16_t o = hash_table[h]; uint16_t h2; uint16_t worst_h; int worst_score; if (o == 0xffff) { /* there is nothing there yet */ hash_table[h] = offset; return; } for (i = 1; i < LZX_HUFF_COMP_HASH_SEARCH_ATTEMPTS; i++) { h2 = (h + i) & HASH_MASK; if (hash_table[h2] == 0xffff) { hash_table[h2] = offset; return; } } /* * There are no slots, but we really want to store this, so we'll kick * out the one with the longest distance. */ worst_h = h; worst_score = offset - o; for (i = 1; i < LZX_HUFF_COMP_HASH_SEARCH_ATTEMPTS; i++) { int score; h2 = (h + i) & HASH_MASK; o = hash_table[h2]; score = offset - o; if (score > worst_score) { worst_score = score; worst_h = h2; } } hash_table[worst_h] = offset; } /* * Yes, struct match looks a lot like a DATA_BLOB. */ struct match { const uint8_t *there; size_t length; }; static inline struct match lookup_match(uint16_t *hash_table, uint16_t h, const uint8_t *data, const uint8_t *here, size_t max_len) { int i; uint16_t o = hash_table[h]; uint16_t h2; size_t len; const uint8_t *there = NULL; struct match best = {0}; for (i = 0; i < LZX_HUFF_COMP_HASH_SEARCH_ATTEMPTS; i++) { h2 = (h + i) & HASH_MASK; o = hash_table[h2]; if (o == 0xffff) { /* * in setting this, we would never have stepped over * an 0xffff, so we won't now. */ break; } there = data + o; if (here - there > 65534 || there > here) { continue; } /* * When we already have a long match, we can try to avoid * measuring out another long, but shorter match. */ if (best.length > 1000 && there[best.length - 1] != best.there[best.length - 1]) { continue; } for (len = 0; len < max_len && here[len] == there[len]; len++) { /* counting */ } if (len > 2) { /* * As a tiebreaker, we prefer the closer match which * is likely to encode smaller (and certainly no worse). */ if (len > best.length || (len == best.length && there > best.there)) { best.length = len; best.there = there; } } } return best; } static ssize_t lz77_encode_block(struct lzxhuff_compressor_context *cmp_ctx, struct lzxhuff_compressor_mem *cmp_mem, uint16_t *hash_table, uint16_t *prev_hash_table) { uint16_t *intermediate = cmp_mem->intermediate; struct huffman_node *leaf_nodes = cmp_mem->leaf_nodes; uint16_t *symbol_values = cmp_mem->symbol_values; size_t i, j, intermediate_len; const uint8_t *data = cmp_ctx->input_bytes + cmp_ctx->input_pos; const uint8_t *prev_block = NULL; size_t remaining_size = cmp_ctx->input_size - cmp_ctx->input_pos; size_t block_end = MIN(65536, remaining_size); struct match match; int n_symbols; if (cmp_ctx->input_size < cmp_ctx->input_pos) { return LZXPRESS_ERROR; } if (cmp_ctx->prev_block_pos != cmp_ctx->input_pos) { prev_block = cmp_ctx->input_bytes + cmp_ctx->prev_block_pos; } else if (prev_hash_table != NULL) { /* we've got confused! hash and block should go together */ return LZXPRESS_ERROR; } /* * leaf_nodes is used to count the symbols seen, for later Huffman * encoding. */ for (i = 0; i < 512; i++) { leaf_nodes[i] = (struct huffman_node) { .symbol = i }; } j = 0; if (remaining_size < 41 || DEBUG_NO_LZ77_MATCHES) { /* * There is no point doing a hash table and looking for * matches in this tiny block (remembering we are committed to * using 32 bits, so there's a good chance we wouldn't even * save a byte). The threshold of 41 matches Windows. * If remaining_size < 3, we *can't* do the hash. */ i = 0; } else { /* * We use 0xffff as the unset value for table, because it is * not a valid match offset (and 0x0 is). */ memset(hash_table, 0xff, sizeof(cmp_mem->hash_table1)); for (i = 0; i <= block_end - 3; i++) { uint16_t code; const uint8_t *here = data + i; uint16_t h = three_byte_hash(here); size_t max_len = MIN(remaining_size - i, MAX_MATCH_LENGTH); match = lookup_match(hash_table, h, data, here, max_len); if (match.there == NULL && prev_hash_table != NULL) { /* * If this is not the first block, * backreferences can look into the previous * block (but only as far as 65535 bytes, so * the end of this block cannot see the start * of the last one). */ match = lookup_match(prev_hash_table, h, prev_block, here, remaining_size - i); } store_match(hash_table, h, i); if (match.there == NULL) { /* add a literal and move on. */ uint8_t c = data[i]; leaf_nodes[c].count++; intermediate[j] = c; j++; continue; } /* a real match */ if (match.length <= 65538) { intermediate[j] = 0xffff; intermediate[j + 1] = match.length - 3; intermediate[j + 2] = here - match.there; j += 3; } else { size_t m = match.length - 3; intermediate[j] = 0xfffe; intermediate[j + 1] = m & 0xffff; intermediate[j + 2] = m >> 16; intermediate[j + 3] = here - match.there; j += 4; } code = encode_match(match.length, here - match.there); leaf_nodes[code].count++; i += match.length - 1; /* `- 1` for the loop i++ */ /* * A match can take us past the intended block length, * extending the block. We don't need to do anything * special for this case -- the loops will naturally * do the right thing. */ } } /* * There might be some bytes at the end. */ for (; i < block_end; i++) { leaf_nodes[data[i]].count++; intermediate[j] = data[i]; j++; } if (i == remaining_size) { /* add a trailing EOF marker (256) */ intermediate[j] = 0xffff; intermediate[j + 1] = 0; intermediate[j + 2] = 1; j += 3; leaf_nodes[256].count++; } intermediate_len = j; cmp_ctx->prev_block_pos = cmp_ctx->input_pos; cmp_ctx->input_pos += i; /* fill in the symbols table */ n_symbols = generate_huffman_codes(leaf_nodes, cmp_mem->internal_nodes, symbol_values); if (n_symbols < 0) { return n_symbols; } return intermediate_len; } static ssize_t write_huffman_table(uint16_t symbol_values[512], uint8_t *output, size_t available_size) { size_t i; if (available_size < 256) { return LZXPRESS_ERROR; } for (i = 0; i < 256; i++) { uint8_t b = 0; uint16_t even = symbol_values[i * 2]; uint16_t odd = symbol_values[i * 2 + 1]; if (even != 0) { b = bitlen_nonzero_16(even); } if (odd != 0) { b |= bitlen_nonzero_16(odd) << 4; } output[i] = b; } return i; } struct write_context { uint8_t *dest; size_t dest_len; size_t head; /* where lengths go */ size_t next_code; /* where symbol stream goes */ size_t pending_next_code; /* will be next_code */ unsigned bit_len; uint32_t bits; }; /* * Write out 16 bits, little-endian, for write_huffman_codes() * * As you'll notice, there's a bit to do. * * We are collecting up bits in a uint32_t, then when there are 16 of them we * write out a word into the stream, using a trio of offsets (wc->next_code, * wc->pending_next_code, and wc->head) which dance around ensuring that the * bitstream and the interspersed lengths are in the right places relative to * each other. */ static inline bool write_bits(struct write_context *wc, uint16_t code, uint16_t length) { wc->bits <<= length; wc->bits |= code; wc->bit_len += length; if (wc->bit_len > 16) { uint32_t w = wc->bits >> (wc->bit_len - 16); wc->bit_len -= 16; if (wc->next_code + 2 > wc->dest_len || unlikely(wc->bit_len > 16)) { return false; } wc->dest[wc->next_code] = w & 0xff; wc->dest[wc->next_code + 1] = (w >> 8) & 0xff; wc->next_code = wc->pending_next_code; wc->pending_next_code = wc->head; wc->head += 2; } return true; } static inline bool write_code(struct write_context *wc, uint16_t code) { int code_bit_len = bitlen_nonzero_16(code); if (unlikely(code == 0)) { return false; } code &= (1 << code_bit_len) - 1; return write_bits(wc, code, code_bit_len); } static inline bool write_byte(struct write_context *wc, uint8_t byte) { if (wc->head + 1 > wc->dest_len) { return false; } wc->dest[wc->head] = byte; wc->head++; return true; } static inline bool write_long_len(struct write_context *wc, size_t len) { if (len < 65535) { if (wc->head + 3 > wc->dest_len) { return false; } wc->dest[wc->head] = 255; wc->dest[wc->head + 1] = len & 255; wc->dest[wc->head + 2] = len >> 8; wc->head += 3; } else { if (wc->head + 7 > wc->dest_len) { return false; } wc->dest[wc->head] = 255; wc->dest[wc->head + 1] = 0; wc->dest[wc->head + 2] = 0; wc->dest[wc->head + 3] = len & 255; wc->dest[wc->head + 4] = (len >> 8) & 255; wc->dest[wc->head + 5] = (len >> 16) & 255; wc->dest[wc->head + 6] = (len >> 24) & 255; wc->head += 7; } return true; } static ssize_t write_compressed_bytes(uint16_t symbol_values[512], uint16_t *intermediate, size_t intermediate_len, uint8_t *dest, size_t dest_len) { bool ok; size_t i; size_t end; struct write_context wc = { .head = 4, .pending_next_code = 2, .dest = dest, .dest_len = dest_len }; for (i = 0; i < intermediate_len; i++) { uint16_t c = intermediate[i]; size_t len; uint16_t distance; uint16_t code_len = 0; uint16_t code_dist = 0; if (c < 256) { ok = write_code(&wc, symbol_values[c]); if (!ok) { return LZXPRESS_ERROR; } continue; } if (c == 0xfffe) { if (i > intermediate_len - 4) { return LZXPRESS_ERROR; } len = intermediate[i + 1]; len |= (uint32_t)intermediate[i + 2] << 16; distance = intermediate[i + 3]; i += 3; } else if (c == 0xffff) { if (i > intermediate_len - 3) { return LZXPRESS_ERROR; } len = intermediate[i + 1]; distance = intermediate[i + 2]; i += 2; } else { return LZXPRESS_ERROR; } if (unlikely(distance == 0)) { return LZXPRESS_ERROR; } /* len has already had 3 subtracted */ if (len >= 15) { /* * We are going to need to write extra length * bytes into the stream, but we don't do it * now, we do it after the code has been * written (and before the distance bits). */ code_len = 15; } else { code_len = len; } code_dist = bitlen_nonzero_16(distance); c = 256 | (code_dist << 4) | code_len; if (c > 511) { return LZXPRESS_ERROR; } ok = write_code(&wc, symbol_values[c]); if (!ok) { return LZXPRESS_ERROR; } if (code_len == 15) { if (len >= 270) { ok = write_long_len(&wc, len); } else { ok = write_byte(&wc, len - 15); } if (! ok) { return LZXPRESS_ERROR; } } if (code_dist != 0) { uint16_t dist_bits = distance - (1 << code_dist); ok = write_bits(&wc, dist_bits, code_dist); if (!ok) { return LZXPRESS_ERROR; } } } /* * There are some intricacies around flushing the bits and returning * the length. * * If the returned length is not exactly right and there is another * block, that block will read its huffman table from the wrong place, * and have all the symbol codes out by a multiple of 4. */ end = wc.head; if (wc.bit_len == 0) { end -= 2; } ok = write_bits(&wc, 0, 16 - wc.bit_len); if (!ok) { return LZXPRESS_ERROR; } for (i = 0; i < 2; i++) { /* * Flush out the bits with zeroes. It doesn't matter if we do * a round too many, as we have buffer space, and have already * determined the returned length (end). */ ok = write_bits(&wc, 0, 16); if (!ok) { return LZXPRESS_ERROR; } } return end; } static ssize_t lzx_huffman_compress_block(struct lzxhuff_compressor_context *cmp_ctx, struct lzxhuff_compressor_mem *cmp_mem, size_t block_no) { ssize_t intermediate_size; uint16_t *hash_table = NULL; uint16_t *back_window_hash_table = NULL; ssize_t bytes_written; if (cmp_ctx->available_size - cmp_ctx->output_pos < 260) { /* huffman block + 4 bytes */ return LZXPRESS_ERROR; } /* * For LZ77 compression, we keep a hash table for the previous block, * via alternation after the first block. * * LZ77 writes into the intermediate buffer in the cmp_mem context. */ if (block_no == 0) { hash_table = cmp_mem->hash_table1; back_window_hash_table = NULL; } else if (block_no & 1) { hash_table = cmp_mem->hash_table2; back_window_hash_table = cmp_mem->hash_table1; } else { hash_table = cmp_mem->hash_table1; back_window_hash_table = cmp_mem->hash_table2; } intermediate_size = lz77_encode_block(cmp_ctx, cmp_mem, hash_table, back_window_hash_table); if (intermediate_size < 0) { return intermediate_size; } /* * Write the 256 byte Huffman table, based on the counts gained in * LZ77 phase. */ bytes_written = write_huffman_table( cmp_mem->symbol_values, cmp_ctx->output + cmp_ctx->output_pos, cmp_ctx->available_size - cmp_ctx->output_pos); if (bytes_written != 256) { return LZXPRESS_ERROR; } cmp_ctx->output_pos += 256; /* * Write the compressed bytes using the LZ77 matches and Huffman codes * worked out in the previous steps. */ bytes_written = write_compressed_bytes( cmp_mem->symbol_values, cmp_mem->intermediate, intermediate_size, cmp_ctx->output + cmp_ctx->output_pos, cmp_ctx->available_size - cmp_ctx->output_pos); if (bytes_written < 0) { return bytes_written; } cmp_ctx->output_pos += bytes_written; return bytes_written; } /* * lzxpress_huffman_max_compressed_size() * * Return the most bytes the compression can take, to allow * pre-allocation. */ size_t lzxpress_huffman_max_compressed_size(size_t input_size) { /* * In the worst case, the output size should be about the same as the * input size, plus the 256 byte header per 64k block. We aim for * ample, but within the order of magnitude. */ return input_size + (input_size / 8) + 270; } /* * lzxpress_huffman_compress_talloc() * * This is the convenience function that allocates the compressor context and * output memory for you. The return value is the number of bytes written to * the location indicated by the output pointer. * * The maximum input_size is effectively around 227MB due to the need to guess * an upper bound on the output size that hits an internal limitation in * talloc. * * @param mem_ctx TALLOC_CTX parent for the compressed buffer. * @param input_bytes memory to be compressed. * @param input_size length of the input buffer. * @param output destination pointer for the compressed data. * * @return the number of bytes written or -1 on error. */ ssize_t lzxpress_huffman_compress_talloc(TALLOC_CTX *mem_ctx, const uint8_t *input_bytes, size_t input_size, uint8_t **output) { struct lzxhuff_compressor_mem *cmp = NULL; size_t alloc_size = lzxpress_huffman_max_compressed_size(input_size); ssize_t output_size; *output = talloc_array(mem_ctx, uint8_t, alloc_size); if (*output == NULL) { return LZXPRESS_ERROR; } cmp = talloc(mem_ctx, struct lzxhuff_compressor_mem); if (cmp == NULL) { TALLOC_FREE(*output); return LZXPRESS_ERROR; } output_size = lzxpress_huffman_compress(cmp, input_bytes, input_size, *output, alloc_size); talloc_free(cmp); if (output_size < 0) { TALLOC_FREE(*output); return LZXPRESS_ERROR; } *output = talloc_realloc(mem_ctx, *output, uint8_t, output_size); if (*output == NULL) { return LZXPRESS_ERROR; } return output_size; } /* * lzxpress_huffman_compress() * * This is the inconvenience function, slightly faster and fiddlier than * lzxpress_huffman_compress_talloc(). * * To use this, you need to have allocated (but not initialised) a `struct * lzxhuff_compressor_mem`, and an output buffer. If the buffer is not big * enough (per `output_size`), you'll get a negative return value, otherwise * the number of bytes actually consumed, which will always be at least 260. * * The `struct lzxhuff_compressor_mem` is reusable -- it is basically a * collection of uninitialised memory buffers. The total size is less than * 150k, so stack allocation is plausible. * * input_size and available_size are limited to the minimum of UINT32_MAX and * SSIZE_MAX. On 64 bit machines that will be UINT32_MAX, or 4GB. * * @param cmp_mem a struct lzxhuff_compressor_mem. * @param input_bytes memory to be compressed. * @param input_size length of the input buffer. * @param output destination for the compressed data. * @param available_size allocated output bytes. * * @return the number of bytes written or -1 on error. */ ssize_t lzxpress_huffman_compress(struct lzxhuff_compressor_mem *cmp_mem, const uint8_t *input_bytes, size_t input_size, uint8_t *output, size_t available_size) { size_t i = 0; struct lzxhuff_compressor_context cmp_ctx = { .input_bytes = input_bytes, .input_size = input_size, .input_pos = 0, .prev_block_pos = 0, .output = output, .available_size = available_size, .output_pos = 0 }; if (input_size == 0) { /* * We can't deal with this for a number of reasons (e.g. it * breaks the Huffman tree), and the output will be infinitely * bigger than the input. The caller needs to go and think * about what they're trying to do here. */ return LZXPRESS_ERROR; } if (input_size > SSIZE_MAX || input_size > UINT32_MAX || available_size > SSIZE_MAX || available_size > UINT32_MAX || available_size == 0) { /* * We use negative ssize_t to return errors, which is limiting * on 32 bit machines; otherwise we adhere to Microsoft's 4GB * limit. * * lzxpress_huffman_compress_talloc() will not get this far, * having already have failed on talloc's 256 MB limit. */ return LZXPRESS_ERROR; } if (cmp_mem == NULL || output == NULL || input_bytes == NULL) { return LZXPRESS_ERROR; } while (cmp_ctx.input_pos < cmp_ctx.input_size) { ssize_t ret; ret = lzx_huffman_compress_block(&cmp_ctx, cmp_mem, i); if (ret < 0) { return ret; } i++; } return cmp_ctx.output_pos; } static void debug_tree_codes(struct bitstream *input) { /* */ size_t head = 0; size_t tail = 2; size_t ffff_count = 0; struct q { uint16_t tree_code; uint16_t code_code; }; struct q queue[65536]; char bits[17]; uint16_t *t = input->table; queue[0].tree_code = 1; queue[0].code_code = 2; queue[1].tree_code = 2; queue[1].code_code = 3; while (head < tail) { struct q q = queue[head]; uint16_t x = t[q.tree_code]; if (x != 0xffff) { int k; uint16_t j = q.code_code; size_t offset = bitlen_nonzero_16(j) - 1; if (unlikely(j == 0)) { DBG("BROKEN code is 0!\n"); return; } for (k = 0; k <= offset; k++) { bool b = (j >> (offset - k)) & 1; bits[k] = b ? '1' : '0'; } bits[k] = 0; DBG("%03x %s\n", x & 511, bits); head++; continue; } ffff_count++; queue[tail].tree_code = q.tree_code * 2 + 1; queue[tail].code_code = q.code_code * 2; tail++; queue[tail].tree_code = q.tree_code * 2 + 1 + 1; queue[tail].code_code = q.code_code * 2 + 1; tail++; head++; } DBG("0xffff count: %zu\n", ffff_count); } /** * Determines the sort order of one prefix_code_symbol relative to another */ static int compare_uint16(const uint16_t *a, const uint16_t *b) { if (*a < *b) { return -1; } if (*a > *b) { return 1; } return 0; } static bool fill_decomp_table(struct bitstream *input) { /* * There are 512 symbols, each encoded in 4 bits, which indicates * their depth in the Huffman tree. The even numbers get the lower * nibble of each byte, so that the byte hex values look backwards * (i.e. 0xab encodes b then a). These are allocated Huffman codes in * order of appearance, per depth. * * For example, if the first two bytes were: * * 0x23 0x53 * * the first four codes have the lengths 3, 2, 3, 5. * Let's call them A, B, C, D. * * Suppose there is no other codeword with length 1 (which is * necessarily true in this example) or 2, but there might be others * of length 3 or 4. Then we can say this about the codes: * * _ --*--_ * / \ * 0 1 * / \ / \ * 0 1 0 1 * B |\ / \ |\ * 0 1 0 1 0 1 * A C |\ /| | |\ * * pos bits code * A 3 010 * B 2 00 * C 3 011 * D 5 1???? * * B has the shortest code, so takes the leftmost branch, 00. That * ends the branch -- nothing else can start with 00. There are no * more 2s, so we look at the 3s, starting as far left as possible. So * A takes 010 and C takes 011. That means everything else has to * start with 1xx. We don't know how many codewords of length 3 or 4 * there are; if there are none, D would end up with 10000, the * leftmost available code of length 5. If the compressor is any good, * there should be no unused leaf nodes left dangling at the end. * * (this is "Canonical Huffman Coding"). * * * But what symbols do these codes actually stand for? * -------------------------------------------------- * * Good question. The first 256 codes stand for the corresponding * literal bytes. The codes from 256 to 511 stand for LZ77 matches, * which have a distance and a length, encoded in a strange way that * isn't entirely the purview of this function. * * What does the value 0 mean? * --------------------------- * * The code does not occur. For example, if the next byte in the * example above was 0x07, that would give the byte 0x04 a 7-long * code, and no code to the 0x05 byte, which means we there is no way * we going to see a 5 in the decoded stream. * * Isn't LZ77 + Huffman what zip/gzip/zlib do? * ------------------------------------------- * * Yes, DEFLATE is LZ77 + Huffman, but the details are quite different. */ uint16_t symbols[512]; uint16_t sort_mem[512]; size_t i, n_symbols; ssize_t code; uint16_t len = 0, prev_len; const uint8_t *table_bytes = input->bytes + input->byte_pos; if (input->byte_pos + 260 > input->byte_size) { return false; } n_symbols = 0; for (i = 0; i < 256; i++) { uint16_t even = table_bytes[i] & 15; uint16_t odd = table_bytes[i] >> 4; if (even != 0) { symbols[n_symbols] = (even << 9) + i * 2; n_symbols++; } if (odd != 0) { symbols[n_symbols] = (odd << 9) + i * 2 + 1; n_symbols++; } } input->byte_pos += 256; if (n_symbols == 0) { return false; } stable_sort(symbols, sort_mem, n_symbols, sizeof(uint16_t), (samba_compare_fn_t)compare_uint16); /* * we're using an implicit binary tree, as you'd see in a heap. * table[0] = unused * table[1] = '0' * table[2] = '1' * table[3] = '00' <-- '00' and '01' are children of '0' * table[4] = '01' <-- '0' is [0], children are [0 * 2 + {1,2}] * table[5] = '10' * table[6] = '11' * table[7] = '000' * table[8] = '001' * table[9] = '010' * table[10]= '011' * table[11]= '100 *' * table[1 << n - 1] = '0' * n * table[1 << n - 1 + x] = n-bit wide x (left padded with '0') * table[1 << n - 2] = '1' * (n - 1) * * table[i]->left = table[i*2 + 1] * table[i]->right = table[i*2 + 2] * table[0xffff] = unused (16 '0's, max len is 15) * * therefore e.g. table[70] = table[64 - 1 + 7] * = table[1 << 6 - 1 + 7] * = '000111' (binary 7, widened to 6 bits) * * and if '000111' is a code, * '00011', '0001', '000', '00', '0' are unavailable prefixes. * 34 16 7 3 1 are their indices * and (i - 1) >> 1 is the path back from 70 through these. * * the lookup is * * 1 start with i = 0 * 2 extract a symbol bit (i = (i << 1) + bit + 1) * 3 is table[i] == 0xffff? * 4 yes -- goto 2 * 4 table[i] & 511 is the symbol, stop * * and the construction (here) is sort of the reverse. * * Most of this table is free space that can never be reached, and * most of the activity is at the beginning (since all codes start * there, and by design the shortest codes are the most common). */ for (i = 0; i < 32; i++) { /* prefill the table head */ input->table[i] = 0xffff; } code = -1; prev_len = 0; for (i = 0; i < n_symbols; i++) { uint16_t s = symbols[i]; uint16_t prefix; len = (s >> 9) & 15; s &= 511; code++; while (len != prev_len) { code <<= 1; code++; prev_len++; } if (code >= 65535) { return false; } input->table[code] = s; for(prefix = (code - 1) >> 1; prefix > 31; prefix = (prefix - 1) >> 1) { input->table[prefix] = 0xffff; } } if (CHECK_DEBUGLVL(10)) { debug_tree_codes(input); } /* * check that the last code encodes 11111..., with right number of * ones, pointing to the right symbol -- otherwise we have a dangling * uninitialised symbol. */ if (code != (1 << (len + 1)) - 2) { return false; } return true; } #define CHECK_READ_32(dest) \ do { \ if (input->byte_pos + 4 > input->byte_size) { \ return LZXPRESS_ERROR; \ } \ dest = PULL_LE_U32(input->bytes, input->byte_pos); \ input->byte_pos += 4; \ } while (0) #define CHECK_READ_16(dest) \ do { \ if (input->byte_pos + 2 > input->byte_size) { \ return LZXPRESS_ERROR; \ } \ dest = PULL_LE_U16(input->bytes, input->byte_pos); \ input->byte_pos += 2; \ } while (0) #define CHECK_READ_8(dest) \ do { \ if (input->byte_pos >= input->byte_size) { \ return LZXPRESS_ERROR; \ } \ dest = PULL_LE_U8(input->bytes, input->byte_pos); \ input->byte_pos++; \ } while(0) static inline ssize_t pull_bits(struct bitstream *input) { if (input->byte_pos + 1 < input->byte_size) { uint16_t tmp; CHECK_READ_16(tmp); input->remaining_bits += 16; input->bits <<= 16; input->bits |= tmp; } else if (input->byte_pos < input->byte_size) { uint8_t tmp; CHECK_READ_8(tmp); input->remaining_bits += 8; input->bits <<= 8; input->bits |= tmp; } else { return LZXPRESS_ERROR; } return 0; } /* * Decompress a block. The actual decompressed size is returned (or -1 on * error). The putative block length is 64k (or shorter, if the message ends * first), but a match can run over the end, extending the block. That's why * we need the overall output size as well as the block size. A match encoded * in this block can point back to previous blocks, but not before the * beginning of the message, so we also need the previously decoded size. * * The compressed block will have 256 bytes for the Huffman table, and at * least 4 bytes of (possibly padded) encoded values. */ static ssize_t lzx_huffman_decompress_block(struct bitstream *input, uint8_t *output, size_t block_size, size_t output_size, size_t previous_size) { size_t output_pos = 0; uint16_t symbol; size_t index; uint16_t distance_bits_wanted = 0; size_t distance = 0; size_t length = 0; bool ok; uint32_t tmp; bool seen_eof_marker = false; ok = fill_decomp_table(input); if (! ok) { return LZXPRESS_ERROR; } if (CHECK_DEBUGLVL(10) || DEBUG_HUFFMAN_TREE) { debug_huffman_tree_from_table(input->table); } /* * Always read 32 bits at the start, even if we don't need them. */ CHECK_READ_16(tmp); CHECK_READ_16(input->bits); input->bits |= tmp << 16; input->remaining_bits = 32; /* * This loop iterates over individual *bits*. These are read from * little-endian 16 bit words, most significant bit first. * * At points in the bitstream, the following are possible: * * # the source word is empty and needs to be refilled from the input * stream. * # an incomplete codeword is being extended. * # a codeword is resolved, either as a literal or a match. * # a literal is written. * # a match is collecting distance bits. * # the output stream is copied, as specified by a match. * # input bytes are read for match lengths. * * Note that we *don't* specifically check for the EOF marker (symbol * 256) in this loop, because the precondition for stopping for the * EOF marker is that the output buffer is full (otherwise, you * wouldn't know which 256 is EOF, rather than an actual symbol), and * we *always* want to stop when the buffer is full. So we work out if * there is an EOF in another loop after we stop writing. */ index = 0; while (output_pos < block_size) { uint16_t b; if (input->remaining_bits == 16) { ssize_t ret = pull_bits(input); if (ret) { return ret; } } input->remaining_bits--; b = (input->bits >> input->remaining_bits) & 1; if (length == 0) { /* not in a match; pulling a codeword */ index <<= 1; index += b + 1; if (input->table[index] == 0xffff) { /* incomplete codeword, the common case */ continue; } /* found the symbol, reset the code string */ symbol = input->table[index] & 511; index = 0; if (symbol < 256) { /* a literal, the easy case */ output[output_pos] = symbol; output_pos++; continue; } /* the beginning of a match */ distance_bits_wanted = (symbol >> 4) & 15; distance = 1 << distance_bits_wanted; length = symbol & 15; if (length == 15) { CHECK_READ_8(tmp); length += tmp; if (length == 255 + 15) { /* * note, we discard (don't add) the * length so far. */ CHECK_READ_16(length); if (length == 0) { CHECK_READ_32(length); } } } length += 3; } else { /* we are pulling extra distance bits */ distance_bits_wanted--; distance |= b << distance_bits_wanted; } if (distance_bits_wanted == 0) { /* * We have a complete match, and it is time to do the * copy (byte by byte, because the ranges can overlap, * and we might need to copy bytes we just copied in). * * It is possible that this match will extend beyond * the end of the expected block. That's fine, so long * as it doesn't extend past the total output size. */ size_t i; size_t end = output_pos + length; uint8_t *here = output + output_pos; uint8_t *there = here - distance; if (end > output_size || previous_size + output_pos < distance || unlikely(end < output_pos || there > here)) { return LZXPRESS_ERROR; } for (i = 0; i < length; i++) { here[i] = there[i]; } output_pos += length; distance = 0; length = 0; } } if (length != 0 || index != 0) { /* it seems like we've hit an early end, mid-code */ return LZXPRESS_ERROR; } if (input->byte_pos + 256 < input->byte_size) { /* * This block is over, but it clearly isn't the last block, so * we don't want to look for the EOF. */ return output_pos; } /* * We won't write any more, but we try to read some more to make sure * we're finishing in a good place. That means we want to see a 256 * symbol and then some number of zeroes, possibly zero, but as many * as 32. * * In this we are perhaps a bit stricter than Windows, which * apparently does not insist on the EOF marker, nor on a lack of * trailing bytes. */ while (true) { uint16_t b; if (input->remaining_bits == 16) { ssize_t ret; if (input->byte_pos == input->byte_size) { /* FIN */ break; } ret = pull_bits(input); if (ret) { return ret; } } input->remaining_bits--; b = (input->bits >> input->remaining_bits) & 1; if (seen_eof_marker) { /* * we have read an EOF symbols. Now we just want to * see zeroes. */ if (b != 0) { return LZXPRESS_ERROR; } continue; } /* we're pulling in a symbol, which had better be 256 */ index <<= 1; index += b + 1; if (input->table[index] == 0xffff) { continue; } symbol = input->table[index] & 511; if (symbol != 256) { return LZXPRESS_ERROR; } seen_eof_marker = true; continue; } if (! seen_eof_marker) { return LZXPRESS_ERROR; } return output_pos; } static ssize_t lzxpress_huffman_decompress_internal(struct bitstream *input, uint8_t *output, size_t output_size) { size_t output_pos = 0; if (input->byte_size < 260) { return LZXPRESS_ERROR; } while (input->byte_pos < input->byte_size) { ssize_t block_output_pos; ssize_t block_output_size; size_t remaining_output_size = output_size - output_pos; block_output_size = MIN(65536, remaining_output_size); block_output_pos = lzx_huffman_decompress_block( input, output + output_pos, block_output_size, remaining_output_size, output_pos); if (block_output_pos < block_output_size) { return LZXPRESS_ERROR; } output_pos += block_output_pos; if (output_pos > output_size) { /* not expecting to get here. */ return LZXPRESS_ERROR; } } if (input->byte_pos != input->byte_size) { return LZXPRESS_ERROR; } return output_pos; } /* * lzxpress_huffman_decompress() * * output_size must be the expected length of the decompressed data. * input_size and output_size are limited to the minimum of UINT32_MAX and * SSIZE_MAX. On 64 bit machines that will be UINT32_MAX, or 4GB. * * @param input_bytes memory to be decompressed. * @param input_size length of the compressed buffer. * @param output destination for the decompressed data. * @param output_size exact expected length of the decompressed data. * * @return the number of bytes written or -1 on error. */ ssize_t lzxpress_huffman_decompress(const uint8_t *input_bytes, size_t input_size, uint8_t *output, size_t output_size) { uint16_t table[65536]; struct bitstream input = { .bytes = input_bytes, .byte_size = input_size, .byte_pos = 0, .bits = 0, .remaining_bits = 0, .table = table }; if (input_size > SSIZE_MAX || input_size > UINT32_MAX || output_size > SSIZE_MAX || output_size > UINT32_MAX || input_size == 0 || output_size == 0 || input_bytes == NULL || output == NULL) { /* * We use negative ssize_t to return errors, which is limiting * on 32 bit machines, and the 4GB limit exists on Windows. */ return LZXPRESS_ERROR; } return lzxpress_huffman_decompress_internal(&input, output, output_size); } /** * lzxpress_huffman_decompress_talloc() * * The caller must provide the exact size of the expected output. * * The input_size is limited to the minimum of UINT32_MAX and SSIZE_MAX, but * output_size is limited to 256MB due to a limit in talloc. This effectively * limits input_size too, as non-crafted compressed data will not exceed the * decompressed size by very much. * * @param mem_ctx TALLOC_CTX parent for the decompressed buffer. * @param input_bytes memory to be decompressed. * @param input_size length of the compressed buffer. * @param output_size expected decompressed size. * * @return a talloc'ed buffer exactly output_size in length, or NULL. */ uint8_t *lzxpress_huffman_decompress_talloc(TALLOC_CTX *mem_ctx, const uint8_t *input_bytes, size_t input_size, size_t output_size) { ssize_t result; uint8_t *output = NULL; struct bitstream input = { .bytes = input_bytes, .byte_size = input_size }; output = talloc_array(mem_ctx, uint8_t, output_size); if (output == NULL) { return NULL; } input.table = talloc_array(mem_ctx, uint16_t, 65536); if (input.table == NULL) { talloc_free(output); return NULL; } result = lzxpress_huffman_decompress_internal(&input, output, output_size); talloc_free(input.table); if (result != output_size) { talloc_free(output); return NULL; } return output; }