diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 20:34:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 20:34:10 +0000 |
commit | e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc (patch) | |
tree | 68cb5ef9081156392f1dd62a00c6ccc1451b93df /epan/tvbuff_lz77huff.c | |
parent | Initial commit. (diff) | |
download | wireshark-e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc.tar.xz wireshark-e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc.zip |
Adding upstream version 4.2.2.upstream/4.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'epan/tvbuff_lz77huff.c')
-rw-r--r-- | epan/tvbuff_lz77huff.c | 423 |
1 files changed, 423 insertions, 0 deletions
diff --git a/epan/tvbuff_lz77huff.c b/epan/tvbuff_lz77huff.c new file mode 100644 index 00000000..7c8de27e --- /dev/null +++ b/epan/tvbuff_lz77huff.c @@ -0,0 +1,423 @@ +/* + * Decompression code for LZ77+Huffman. This encoding is used by + * Microsoft in various file formats and protocols including SMB3. + * + * See MS-XCA. + * + * Initial code from Samba re-licensed with Samuel's permission. + * Copyright (C) Samuel Cabrero 2017 + * + * Glib-ification, extra error-checking and WS integration + * Copyright (C) Aurélien Aptel 2019 + * + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 1998 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <glib.h> +#include <stdlib.h> /* qsort */ +#include <epan/exceptions.h> +#include <epan/tvbuff.h> +#include <epan/wmem_scopes.h> + +#define MAX_INPUT_SIZE (16*1024*1024) /* 16MB */ + +#define TREE_SIZE 1024 +#define ENCODED_TREE_SIZE 256 +#define SYMBOL_INFO_SIZE (2*ENCODED_TREE_SIZE) + +struct input { + tvbuff_t *tvb; + int offset; + gsize size; +}; + +/** + * Represents a node in a Huffman prefix code tree + */ +struct prefix_code_node { + /* Stores the symbol encoded by this node in the prefix code tree */ + guint16 symbol; + + /* Indicates whether this node is a leaf in the tree */ + guint8 leaf; + + /* + * Points to the node's two children. Values are indexes in + * the tree node array. The value -1 is used to indicate that + * a particular child does not exist + */ + gint16 child[2]; +}; + +/** + * Represent information about a Huffman-encoded symbol + */ +struct prefix_code_symbol { + /* Stores the symbol */ + guint16 symbol; + + /* Stores the symbol’s Huffman prefix code length */ + guint16 length; +}; + +/** + * Represent a byte array as a bit string from which individual bits can + * be read + */ +struct bitstring { + /* The byte array */ + const struct input *input; + + /* The index in source from which the next set of bits will be pulled + * when the bits in mask have been consumed */ + guint32 bitstring_index; + + /* Stores the next bits to be consumed in the bit string */ + guint32 mask; + + /* Stores the number of bits in mask that remain to be consumed */ + gint32 bits; +}; + +struct hf_tree { + struct prefix_code_node *root; + struct prefix_code_node nodes[TREE_SIZE]; +}; + +static gboolean is_node_valid(struct hf_tree *tree, struct prefix_code_node *node) +{ + return (node && node >= tree->nodes && node < tree->nodes + TREE_SIZE); +} + +/** + * Links a symbol's prefix_code_node into its correct position in a Huffman + * prefix code tree + */ +static int prefix_code_tree_add_leaf(struct hf_tree *tree, + guint32 leaf_index, + guint32 mask, + guint32 bits, + guint32 *out_index) +{ + struct prefix_code_node *node = &tree->nodes[0]; + guint32 i = leaf_index + 1; + guint32 child_index; + + if (leaf_index >= TREE_SIZE) + return -1; + + while (bits > 1) { + bits = bits - 1; + child_index = (mask >> bits) & 1; + if (node->child[child_index] < 0) { + if (i >= TREE_SIZE) + return -1; + node->child[child_index] = i; + tree->nodes[i].leaf = FALSE; + i = i + 1; + } + node = tree->nodes + node->child[child_index]; + if (!is_node_valid(tree, node)) + return -1; + } + + node->child[mask & 1] = leaf_index; + + *out_index = i; + return 0; +} + +/** + * Determines the sort order of one prefix_code_symbol relative to another + */ +static int compare_symbols(const void *ve1, const void *ve2) +{ + const struct prefix_code_symbol *e1 = (const struct prefix_code_symbol *)ve1; + const struct prefix_code_symbol *e2 = (const struct prefix_code_symbol *)ve2; + + if (e1->length < e2->length) + return -1; + else if (e1->length > e2->length) + return 1; + else if (e1->symbol < e2->symbol) + return -1; + else if (e1->symbol > e2->symbol) + return 1; + else + return 0; +} + +/** + * Rebuilds the Huffman prefix code tree that will be used to decode symbols + * during decompression + */ +static int PrefixCodeTreeRebuild( struct hf_tree *tree, + const struct input *input) +{ + struct prefix_code_symbol symbolInfo[SYMBOL_INFO_SIZE]; + guint32 i, j, mask, bits; + int rc; + + for (i = 0; i < TREE_SIZE; i++) { + tree->nodes[i].symbol = 0; + tree->nodes[i].leaf = FALSE; + tree->nodes[i].child[0] = -1; + tree->nodes[i].child[1] = -1; + } + + if (input->size < ENCODED_TREE_SIZE) + return -1; + + for (i = 0; i < ENCODED_TREE_SIZE; i++) { + symbolInfo[2*i].symbol = 2*i; + symbolInfo[2*i].length = tvb_get_guint8(input->tvb, input->offset+i) & 15; + symbolInfo[2*i+1].symbol = 2*i+1; + symbolInfo[2*i+1].length = tvb_get_guint8(input->tvb, input->offset+i) >> 4; + } + + qsort(symbolInfo, SYMBOL_INFO_SIZE, sizeof(symbolInfo[0]), compare_symbols); + + i = 0; + while (i < SYMBOL_INFO_SIZE && symbolInfo[i].length == 0) { + i = i + 1; + } + + mask = 0; + bits = 1; + + tree->root = &tree->nodes[0]; + tree->root->leaf = FALSE; + + j = 1; + for (; i < 512; i++) { + //ws_assert(j < TREE_SIZE); + if (j >= TREE_SIZE) { + return -1; + } + tree->nodes[j].symbol = symbolInfo[i].symbol; + tree->nodes[j].leaf = TRUE; + mask <<= symbolInfo[i].length - bits; + bits = symbolInfo[i].length; + rc = prefix_code_tree_add_leaf(tree, j, mask, bits, &j); + if (rc) + return rc; + mask += 1; + } + + return 0; +} + +/** + * Initializes a bitstream data structure + */ +static void bitstring_init(struct bitstring *bstr, + const struct input *input, + guint32 bitstring_index) +{ + bstr->mask = tvb_get_letohs(input->tvb, input->offset+bitstring_index); + bstr->mask <<= sizeof(bstr->mask) * 8 - 16; + bitstring_index += 2; + + bstr->mask += tvb_get_letohs(input->tvb, input->offset+bitstring_index); + bitstring_index += 2; + + bstr->bits = 32; + bstr->input = input; + bstr->bitstring_index = bitstring_index; +} + +/** + * Returns the next n bits from the front of a bit string. + */ +static guint32 bitstring_lookup(struct bitstring *bstr, guint32 n) +{ + if (n == 0 || bstr->bits < 0 || n > (guint32)bstr->bits) { + return 0; + } + return bstr->mask >> (sizeof(bstr->mask) * 8 - n); +} + +/** + * Advances the bit string's cursor by n bits. + */ +static void bitstring_skip(struct bitstring *bstr, guint32 n) +{ + bstr->mask = bstr->mask << n; + bstr->bits = bstr->bits - n; + + if (bstr->bits < 16) { + bstr->mask += tvb_get_letohs(bstr->input->tvb, + bstr->input->offset + bstr->bitstring_index) + << (16 - bstr->bits); + bstr->bitstring_index = bstr->bitstring_index + 2; + bstr->bits = bstr->bits + 16; + } +} + +/** + * Returns the symbol encoded by the next prefix code in a bit string. + */ +static int prefix_code_tree_decode_symbol(struct hf_tree *tree, + struct bitstring *bstr, + guint32 *out_symbol) +{ + guint32 bit; + struct prefix_code_node *node = tree->root; + + do { + bit = bitstring_lookup(bstr, 1); + bitstring_skip(bstr, 1); + node = tree->nodes + node->child[bit]; + if (!is_node_valid(tree, node)) + return -1; + } while (node->leaf == FALSE); + + *out_symbol = node->symbol; + return 0; +} + +static gboolean do_uncompress(struct input *input, + wmem_array_t *obuf) +{ + guint32 symbol; + guint32 length; + gint32 match_offset; + int rc; + struct hf_tree tree = {0}; + struct bitstring bstr = {0}; + + if (!input->tvb) + return FALSE; + + if (!input->size || input->size > MAX_INPUT_SIZE) + return FALSE; + + rc = PrefixCodeTreeRebuild(&tree, input); + if (rc) + return FALSE; + + bitstring_init(&bstr, input, ENCODED_TREE_SIZE); + + while (1) { + rc = prefix_code_tree_decode_symbol(&tree, &bstr, &symbol); + if (rc < 0) + return FALSE; + + if (symbol < 256) { + guint8 v = symbol & 0xFF; + wmem_array_append_one(obuf, v); + } else { + if (symbol == 256) { + /* EOF symbol */ + return bstr.bitstring_index == bstr.input->size; + } + symbol = symbol - 256; + length = symbol & 0xF; + symbol = symbol >> 4; + + match_offset = (1U << symbol) + bitstring_lookup(&bstr, symbol); + match_offset *= -1; + + if (length == 15) { + if (bstr.bitstring_index >= bstr.input->size) + return FALSE; + length = tvb_get_guint8(bstr.input->tvb, + bstr.input->offset+bstr.bitstring_index) + 15; + bstr.bitstring_index += 1; + + if (length == 270) { + if (bstr.bitstring_index+1 >= bstr.input->size) + return FALSE; + length = tvb_get_letohs(bstr.input->tvb, bstr.input->offset+bstr.bitstring_index); + bstr.bitstring_index += 2; + } + } + + bitstring_skip(&bstr, symbol); + + length += 3; + do { + guint8 byte; + guint elem_count = wmem_array_get_count(obuf)+match_offset; + + if (wmem_array_try_index(obuf, elem_count, &byte)) + return FALSE; + wmem_array_append_one(obuf, byte); + length--; + } while (length != 0); + } + } + return TRUE; +} + +tvbuff_t * +tvb_uncompress_lz77huff(tvbuff_t *tvb, + const int offset, + int input_size) +{ + volatile gboolean ok; + wmem_allocator_t *pool; + wmem_array_t *obuf; + tvbuff_t *out; + struct input input = { + .tvb = tvb, + .offset = offset, + .size = input_size + }; + + pool = wmem_allocator_new(WMEM_ALLOCATOR_SIMPLE); + obuf = wmem_array_sized_new(pool, 1, input_size*2); + + TRY { + ok = do_uncompress(&input, obuf); + } CATCH_ALL { + ok = FALSE; + } + ENDTRY; + + if (ok) { + /* + * Cannot pass a tvb free callback that frees the wmem + * pool, so we make an extra copy that uses bare + * pointers. This could be optimized if tvb API had a + * free pool callback of some sort. + */ + guint size = wmem_array_get_count(obuf); + guint8 *p = (guint8 *)g_malloc(size); + memcpy(p, wmem_array_get_raw(obuf), size); + out = tvb_new_real_data(p, size, size); + tvb_set_free_cb(out, g_free); + } else { + out = NULL; + } + + wmem_destroy_allocator(pool); + + return out; +} + +tvbuff_t * +tvb_child_uncompress_lz77huff(tvbuff_t *parent, tvbuff_t *tvb, const int offset, int in_size) +{ + tvbuff_t *new_tvb = tvb_uncompress_lz77huff(tvb, offset, in_size); + if (new_tvb) + tvb_set_child_real_data_tvbuff(parent, new_tvb); + return new_tvb; +} + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 8 + * tab-width: 8 + * indent-tabs-mode: t + * End: + * + * vi: set shiftwidth=8 tabstop=8 noexpandtab: + * :indentSize=8:tabSize=8:noTabs=false: + */ |