summaryrefslogtreecommitdiffstats
path: root/epan/tvbuff_lz77huff.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 20:34:10 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-10 20:34:10 +0000
commite4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc (patch)
tree68cb5ef9081156392f1dd62a00c6ccc1451b93df /epan/tvbuff_lz77huff.c
parentInitial commit. (diff)
downloadwireshark-e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc.tar.xz
wireshark-e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc.zip
Adding upstream version 4.2.2.upstream/4.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'epan/tvbuff_lz77huff.c')
-rw-r--r--epan/tvbuff_lz77huff.c423
1 files changed, 423 insertions, 0 deletions
diff --git a/epan/tvbuff_lz77huff.c b/epan/tvbuff_lz77huff.c
new file mode 100644
index 00000000..7c8de27e
--- /dev/null
+++ b/epan/tvbuff_lz77huff.c
@@ -0,0 +1,423 @@
+/*
+ * Decompression code for LZ77+Huffman. This encoding is used by
+ * Microsoft in various file formats and protocols including SMB3.
+ *
+ * See MS-XCA.
+ *
+ * Initial code from Samba re-licensed with Samuel's permission.
+ * Copyright (C) Samuel Cabrero 2017
+ *
+ * Glib-ification, extra error-checking and WS integration
+ * Copyright (C) Aurélien Aptel 2019
+ *
+ * Wireshark - Network traffic analyzer
+ * By Gerald Combs <gerald@wireshark.org>
+ * Copyright 1998 Gerald Combs
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <glib.h>
+#include <stdlib.h> /* qsort */
+#include <epan/exceptions.h>
+#include <epan/tvbuff.h>
+#include <epan/wmem_scopes.h>
+
+#define MAX_INPUT_SIZE (16*1024*1024) /* 16MB */
+
+#define TREE_SIZE 1024
+#define ENCODED_TREE_SIZE 256
+#define SYMBOL_INFO_SIZE (2*ENCODED_TREE_SIZE)
+
+struct input {
+ tvbuff_t *tvb;
+ int offset;
+ gsize size;
+};
+
+/**
+ * Represents a node in a Huffman prefix code tree
+ */
+struct prefix_code_node {
+ /* Stores the symbol encoded by this node in the prefix code tree */
+ guint16 symbol;
+
+ /* Indicates whether this node is a leaf in the tree */
+ guint8 leaf;
+
+ /*
+ * Points to the node's two children. Values are indexes in
+ * the tree node array. The value -1 is used to indicate that
+ * a particular child does not exist
+ */
+ gint16 child[2];
+};
+
+/**
+ * Represent information about a Huffman-encoded symbol
+ */
+struct prefix_code_symbol {
+ /* Stores the symbol */
+ guint16 symbol;
+
+ /* Stores the symbol’s Huffman prefix code length */
+ guint16 length;
+};
+
+/**
+ * Represent a byte array as a bit string from which individual bits can
+ * be read
+ */
+struct bitstring {
+ /* The byte array */
+ const struct input *input;
+
+ /* The index in source from which the next set of bits will be pulled
+ * when the bits in mask have been consumed */
+ guint32 bitstring_index;
+
+ /* Stores the next bits to be consumed in the bit string */
+ guint32 mask;
+
+ /* Stores the number of bits in mask that remain to be consumed */
+ gint32 bits;
+};
+
+struct hf_tree {
+ struct prefix_code_node *root;
+ struct prefix_code_node nodes[TREE_SIZE];
+};
+
+static gboolean is_node_valid(struct hf_tree *tree, struct prefix_code_node *node)
+{
+ return (node && node >= tree->nodes && node < tree->nodes + TREE_SIZE);
+}
+
+/**
+ * Links a symbol's prefix_code_node into its correct position in a Huffman
+ * prefix code tree
+ */
+static int prefix_code_tree_add_leaf(struct hf_tree *tree,
+ guint32 leaf_index,
+ guint32 mask,
+ guint32 bits,
+ guint32 *out_index)
+{
+ struct prefix_code_node *node = &tree->nodes[0];
+ guint32 i = leaf_index + 1;
+ guint32 child_index;
+
+ if (leaf_index >= TREE_SIZE)
+ return -1;
+
+ while (bits > 1) {
+ bits = bits - 1;
+ child_index = (mask >> bits) & 1;
+ if (node->child[child_index] < 0) {
+ if (i >= TREE_SIZE)
+ return -1;
+ node->child[child_index] = i;
+ tree->nodes[i].leaf = FALSE;
+ i = i + 1;
+ }
+ node = tree->nodes + node->child[child_index];
+ if (!is_node_valid(tree, node))
+ return -1;
+ }
+
+ node->child[mask & 1] = leaf_index;
+
+ *out_index = i;
+ return 0;
+}
+
+/**
+ * Determines the sort order of one prefix_code_symbol relative to another
+ */
+static int compare_symbols(const void *ve1, const void *ve2)
+{
+ const struct prefix_code_symbol *e1 = (const struct prefix_code_symbol *)ve1;
+ const struct prefix_code_symbol *e2 = (const struct prefix_code_symbol *)ve2;
+
+ if (e1->length < e2->length)
+ return -1;
+ else if (e1->length > e2->length)
+ return 1;
+ else if (e1->symbol < e2->symbol)
+ return -1;
+ else if (e1->symbol > e2->symbol)
+ return 1;
+ else
+ return 0;
+}
+
+/**
+ * Rebuilds the Huffman prefix code tree that will be used to decode symbols
+ * during decompression
+ */
+static int PrefixCodeTreeRebuild( struct hf_tree *tree,
+ const struct input *input)
+{
+ struct prefix_code_symbol symbolInfo[SYMBOL_INFO_SIZE];
+ guint32 i, j, mask, bits;
+ int rc;
+
+ for (i = 0; i < TREE_SIZE; i++) {
+ tree->nodes[i].symbol = 0;
+ tree->nodes[i].leaf = FALSE;
+ tree->nodes[i].child[0] = -1;
+ tree->nodes[i].child[1] = -1;
+ }
+
+ if (input->size < ENCODED_TREE_SIZE)
+ return -1;
+
+ for (i = 0; i < ENCODED_TREE_SIZE; i++) {
+ symbolInfo[2*i].symbol = 2*i;
+ symbolInfo[2*i].length = tvb_get_guint8(input->tvb, input->offset+i) & 15;
+ symbolInfo[2*i+1].symbol = 2*i+1;
+ symbolInfo[2*i+1].length = tvb_get_guint8(input->tvb, input->offset+i) >> 4;
+ }
+
+ qsort(symbolInfo, SYMBOL_INFO_SIZE, sizeof(symbolInfo[0]), compare_symbols);
+
+ i = 0;
+ while (i < SYMBOL_INFO_SIZE && symbolInfo[i].length == 0) {
+ i = i + 1;
+ }
+
+ mask = 0;
+ bits = 1;
+
+ tree->root = &tree->nodes[0];
+ tree->root->leaf = FALSE;
+
+ j = 1;
+ for (; i < 512; i++) {
+ //ws_assert(j < TREE_SIZE);
+ if (j >= TREE_SIZE) {
+ return -1;
+ }
+ tree->nodes[j].symbol = symbolInfo[i].symbol;
+ tree->nodes[j].leaf = TRUE;
+ mask <<= symbolInfo[i].length - bits;
+ bits = symbolInfo[i].length;
+ rc = prefix_code_tree_add_leaf(tree, j, mask, bits, &j);
+ if (rc)
+ return rc;
+ mask += 1;
+ }
+
+ return 0;
+}
+
+/**
+ * Initializes a bitstream data structure
+ */
+static void bitstring_init(struct bitstring *bstr,
+ const struct input *input,
+ guint32 bitstring_index)
+{
+ bstr->mask = tvb_get_letohs(input->tvb, input->offset+bitstring_index);
+ bstr->mask <<= sizeof(bstr->mask) * 8 - 16;
+ bitstring_index += 2;
+
+ bstr->mask += tvb_get_letohs(input->tvb, input->offset+bitstring_index);
+ bitstring_index += 2;
+
+ bstr->bits = 32;
+ bstr->input = input;
+ bstr->bitstring_index = bitstring_index;
+}
+
+/**
+ * Returns the next n bits from the front of a bit string.
+ */
+static guint32 bitstring_lookup(struct bitstring *bstr, guint32 n)
+{
+ if (n == 0 || bstr->bits < 0 || n > (guint32)bstr->bits) {
+ return 0;
+ }
+ return bstr->mask >> (sizeof(bstr->mask) * 8 - n);
+}
+
+/**
+ * Advances the bit string's cursor by n bits.
+ */
+static void bitstring_skip(struct bitstring *bstr, guint32 n)
+{
+ bstr->mask = bstr->mask << n;
+ bstr->bits = bstr->bits - n;
+
+ if (bstr->bits < 16) {
+ bstr->mask += tvb_get_letohs(bstr->input->tvb,
+ bstr->input->offset + bstr->bitstring_index)
+ << (16 - bstr->bits);
+ bstr->bitstring_index = bstr->bitstring_index + 2;
+ bstr->bits = bstr->bits + 16;
+ }
+}
+
+/**
+ * Returns the symbol encoded by the next prefix code in a bit string.
+ */
+static int prefix_code_tree_decode_symbol(struct hf_tree *tree,
+ struct bitstring *bstr,
+ guint32 *out_symbol)
+{
+ guint32 bit;
+ struct prefix_code_node *node = tree->root;
+
+ do {
+ bit = bitstring_lookup(bstr, 1);
+ bitstring_skip(bstr, 1);
+ node = tree->nodes + node->child[bit];
+ if (!is_node_valid(tree, node))
+ return -1;
+ } while (node->leaf == FALSE);
+
+ *out_symbol = node->symbol;
+ return 0;
+}
+
+static gboolean do_uncompress(struct input *input,
+ wmem_array_t *obuf)
+{
+ guint32 symbol;
+ guint32 length;
+ gint32 match_offset;
+ int rc;
+ struct hf_tree tree = {0};
+ struct bitstring bstr = {0};
+
+ if (!input->tvb)
+ return FALSE;
+
+ if (!input->size || input->size > MAX_INPUT_SIZE)
+ return FALSE;
+
+ rc = PrefixCodeTreeRebuild(&tree, input);
+ if (rc)
+ return FALSE;
+
+ bitstring_init(&bstr, input, ENCODED_TREE_SIZE);
+
+ while (1) {
+ rc = prefix_code_tree_decode_symbol(&tree, &bstr, &symbol);
+ if (rc < 0)
+ return FALSE;
+
+ if (symbol < 256) {
+ guint8 v = symbol & 0xFF;
+ wmem_array_append_one(obuf, v);
+ } else {
+ if (symbol == 256) {
+ /* EOF symbol */
+ return bstr.bitstring_index == bstr.input->size;
+ }
+ symbol = symbol - 256;
+ length = symbol & 0xF;
+ symbol = symbol >> 4;
+
+ match_offset = (1U << symbol) + bitstring_lookup(&bstr, symbol);
+ match_offset *= -1;
+
+ if (length == 15) {
+ if (bstr.bitstring_index >= bstr.input->size)
+ return FALSE;
+ length = tvb_get_guint8(bstr.input->tvb,
+ bstr.input->offset+bstr.bitstring_index) + 15;
+ bstr.bitstring_index += 1;
+
+ if (length == 270) {
+ if (bstr.bitstring_index+1 >= bstr.input->size)
+ return FALSE;
+ length = tvb_get_letohs(bstr.input->tvb, bstr.input->offset+bstr.bitstring_index);
+ bstr.bitstring_index += 2;
+ }
+ }
+
+ bitstring_skip(&bstr, symbol);
+
+ length += 3;
+ do {
+ guint8 byte;
+ guint elem_count = wmem_array_get_count(obuf)+match_offset;
+
+ if (wmem_array_try_index(obuf, elem_count, &byte))
+ return FALSE;
+ wmem_array_append_one(obuf, byte);
+ length--;
+ } while (length != 0);
+ }
+ }
+ return TRUE;
+}
+
+tvbuff_t *
+tvb_uncompress_lz77huff(tvbuff_t *tvb,
+ const int offset,
+ int input_size)
+{
+ volatile gboolean ok;
+ wmem_allocator_t *pool;
+ wmem_array_t *obuf;
+ tvbuff_t *out;
+ struct input input = {
+ .tvb = tvb,
+ .offset = offset,
+ .size = input_size
+ };
+
+ pool = wmem_allocator_new(WMEM_ALLOCATOR_SIMPLE);
+ obuf = wmem_array_sized_new(pool, 1, input_size*2);
+
+ TRY {
+ ok = do_uncompress(&input, obuf);
+ } CATCH_ALL {
+ ok = FALSE;
+ }
+ ENDTRY;
+
+ if (ok) {
+ /*
+ * Cannot pass a tvb free callback that frees the wmem
+ * pool, so we make an extra copy that uses bare
+ * pointers. This could be optimized if tvb API had a
+ * free pool callback of some sort.
+ */
+ guint size = wmem_array_get_count(obuf);
+ guint8 *p = (guint8 *)g_malloc(size);
+ memcpy(p, wmem_array_get_raw(obuf), size);
+ out = tvb_new_real_data(p, size, size);
+ tvb_set_free_cb(out, g_free);
+ } else {
+ out = NULL;
+ }
+
+ wmem_destroy_allocator(pool);
+
+ return out;
+}
+
+tvbuff_t *
+tvb_child_uncompress_lz77huff(tvbuff_t *parent, tvbuff_t *tvb, const int offset, int in_size)
+{
+ tvbuff_t *new_tvb = tvb_uncompress_lz77huff(tvb, offset, in_size);
+ if (new_tvb)
+ tvb_set_child_real_data_tvbuff(parent, new_tvb);
+ return new_tvb;
+}
+
+/*
+ * Editor modelines - https://www.wireshark.org/tools/modelines.html
+ *
+ * Local variables:
+ * c-basic-offset: 8
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * End:
+ *
+ * vi: set shiftwidth=8 tabstop=8 noexpandtab:
+ * :indentSize=8:tabSize=8:noTabs=false:
+ */