summaryrefslogtreecommitdiffstats
path: root/lib/compression
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-05 17:47:29 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-05 17:47:29 +0000
commit4f5791ebd03eaec1c7da0865a383175b05102712 (patch)
tree8ce7b00f7a76baa386372422adebbe64510812d4 /lib/compression
parentInitial commit. (diff)
downloadsamba-4f5791ebd03eaec1c7da0865a383175b05102712.tar.xz
samba-4f5791ebd03eaec1c7da0865a383175b05102712.zip
Adding upstream version 2:4.17.12+dfsg.upstream/2%4.17.12+dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/compression')
-rw-r--r--lib/compression/lzxpress.c347
-rw-r--r--lib/compression/lzxpress.h50
-rw-r--r--lib/compression/testsuite.c483
-rw-r--r--lib/compression/wscript_build6
4 files changed, 886 insertions, 0 deletions
diff --git a/lib/compression/lzxpress.c b/lib/compression/lzxpress.c
new file mode 100644
index 0000000..6b2aeef
--- /dev/null
+++ b/lib/compression/lzxpress.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright (C) Matthieu Suiche 2008
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include "replace.h"
+#include "lzxpress.h"
+#include "../lib/util/byteorder.h"
+
+
+#define __CHECK_BYTES(__size, __index, __needed) do { \
+ if (unlikely(__index >= __size)) { \
+ return -1; \
+ } else { \
+ uint32_t __avail = __size - __index; \
+ if (unlikely(__needed > __avail)) { \
+ return -1; \
+ } \
+ } \
+} while(0)
+
+#define CHECK_INPUT_BYTES(__needed) \
+ __CHECK_BYTES(uncompressed_size, uncompressed_pos, __needed)
+#define CHECK_OUTPUT_BYTES(__needed) \
+ __CHECK_BYTES(max_compressed_size, compressed_pos, __needed)
+
+ssize_t lzxpress_compress(const uint8_t *uncompressed,
+ uint32_t uncompressed_size,
+ uint8_t *compressed,
+ uint32_t max_compressed_size)
+{
+ /*
+ * This is the algorithm in [MS-XCA] 2.3 "Plain LZ77 Compression".
+ *
+ * It avoids Huffman encoding by including literal bytes inline when a
+ * match is not found. Every so often it includes a uint32 bit map
+ * flagging which positions contain matches and which contain
+ * literals. The encoding of matches is of variable size, depending on
+ * the match length; they are always at least 16 bits long, and can
+ * implicitly use unused half-bytes from earlier in the stream.
+ */
+ uint32_t uncompressed_pos, compressed_pos;
+ uint32_t indic;
+ uint32_t indic_pos;
+ uint32_t indic_bit, nibble_index;
+
+ if (!uncompressed_size) {
+ return 0;
+ }
+
+ uncompressed_pos = 0;
+ compressed_pos = 0;
+ indic = 0;
+ CHECK_OUTPUT_BYTES(sizeof(uint32_t));
+ PUSH_LE_U32(compressed, compressed_pos, 0);
+ compressed_pos += sizeof(uint32_t);
+ indic_pos = 0;
+
+ indic_bit = 0;
+ nibble_index = 0;
+
+ while ((uncompressed_pos < uncompressed_size) &&
+ (compressed_pos < max_compressed_size)) {
+ bool found = false;
+
+ uint32_t best_len = 2;
+ uint32_t best_offset = 0;
+
+ int32_t offset;
+
+ const uint32_t max_offset = MIN(0x2000, uncompressed_pos);
+ /* maximum len we can encode into metadata */
+ const uint32_t max_len = MIN(0xFFFF + 3, uncompressed_size - uncompressed_pos);
+
+ /* search for the longest match in the window for the lookahead buffer */
+ for (offset = 1; (uint32_t)offset <= max_offset; offset++) {
+ uint32_t len;
+
+ for (len = 0;
+ (len < max_len) && (uncompressed[uncompressed_pos + len] ==
+ uncompressed[uncompressed_pos + len - offset]);
+ len++);
+
+ /*
+ * We check if len is better than the value found before, including the
+ * sequence of identical bytes
+ */
+ if (len > best_len) {
+ found = true;
+ best_len = len;
+ best_offset = offset;
+ if (best_len == max_len) {
+ /* We're not going to do better than this */
+ break;
+ }
+ }
+ }
+
+ if (!found) {
+ /*
+ * This is going to literal byte, which we flag by
+ * setting a bit in an indicator field somewhere
+ * earlier in the stream.
+ */
+ CHECK_INPUT_BYTES(sizeof(uint8_t));
+ CHECK_OUTPUT_BYTES(sizeof(uint8_t));
+ compressed[compressed_pos++] = uncompressed[uncompressed_pos++];
+
+ indic <<= 1;
+ indic_bit += 1;
+
+ if (indic_bit == 32) {
+ PUSH_LE_U32(compressed, indic_pos, indic);
+ indic_bit = 0;
+ CHECK_OUTPUT_BYTES(sizeof(uint32_t));
+ indic_pos = compressed_pos;
+ compressed_pos += sizeof(uint32_t);
+ }
+ } else {
+ uint32_t match_len = best_len;
+
+ uint16_t metadata;
+
+ match_len -= 3;
+ best_offset -= 1;
+
+ /* Classical meta-data */
+ CHECK_OUTPUT_BYTES(sizeof(uint16_t));
+ metadata = (uint16_t)((best_offset << 3) | MIN(match_len, 7));
+ PUSH_LE_U16(compressed, compressed_pos, metadata);
+ compressed_pos += sizeof(uint16_t);
+
+ if (match_len >= 7) {
+ match_len -= 7;
+
+ if (!nibble_index) {
+ nibble_index = compressed_pos;
+
+ CHECK_OUTPUT_BYTES(sizeof(uint8_t));
+ compressed[nibble_index] = MIN(match_len, 15);
+ compressed_pos += sizeof(uint8_t);
+ } else {
+ compressed[nibble_index] |= MIN(match_len, 15) << 4;
+ nibble_index = 0;
+ }
+
+ if (match_len >= 15) {
+ match_len -= 15;
+
+ CHECK_OUTPUT_BYTES(sizeof(uint8_t));
+ compressed[compressed_pos] = MIN(match_len, 255);
+ compressed_pos += sizeof(uint8_t);
+
+ if (match_len >= 255) {
+ /* Additional match_len */
+
+ match_len += 7 + 15;
+
+ if (match_len < (1 << 16)) {
+ CHECK_OUTPUT_BYTES(sizeof(uint16_t));
+ PUSH_LE_U16(compressed, compressed_pos, match_len);
+ compressed_pos += sizeof(uint16_t);
+ } else {
+ CHECK_OUTPUT_BYTES(sizeof(uint16_t) + sizeof(uint32_t));
+ PUSH_LE_U16(compressed, compressed_pos, 0);
+ compressed_pos += sizeof(uint16_t);
+
+ PUSH_LE_U32(compressed, compressed_pos, match_len);
+ compressed_pos += sizeof(uint32_t);
+ }
+ }
+ }
+ }
+
+ indic = (indic << 1) | 1;
+ indic_bit += 1;
+
+ if (indic_bit == 32) {
+ PUSH_LE_U32(compressed, indic_pos, indic);
+ indic_bit = 0;
+ CHECK_OUTPUT_BYTES(sizeof(uint32_t));
+ indic_pos = compressed_pos;
+ compressed_pos += sizeof(uint32_t);
+ }
+
+ uncompressed_pos += best_len;
+ }
+ }
+
+ if (indic_bit != 0) {
+ indic <<= 32 - indic_bit;
+ }
+ indic |= UINT32_MAX >> indic_bit;
+ PUSH_LE_U32(compressed, indic_pos, indic);
+
+ return compressed_pos;
+}
+
+ssize_t lzxpress_decompress(const uint8_t *input,
+ uint32_t input_size,
+ uint8_t *output,
+ uint32_t max_output_size)
+{
+ /*
+ * This is the algorithm in [MS-XCA] 2.4 "Plain LZ77 Decompression
+ * Algorithm Details".
+ */
+ uint32_t output_index, input_index;
+ uint32_t indicator, indicator_bit;
+ uint32_t nibble_index;
+
+ if (input_size == 0) {
+ return 0;
+ }
+
+ output_index = 0;
+ input_index = 0;
+ indicator = 0;
+ indicator_bit = 0;
+ nibble_index = 0;
+
+#undef CHECK_INPUT_BYTES
+#define CHECK_INPUT_BYTES(__needed) \
+ __CHECK_BYTES(input_size, input_index, __needed)
+#undef CHECK_OUTPUT_BYTES
+#define CHECK_OUTPUT_BYTES(__needed) \
+ __CHECK_BYTES(max_output_size, output_index, __needed)
+
+ do {
+ if (indicator_bit == 0) {
+ CHECK_INPUT_BYTES(sizeof(uint32_t));
+ indicator = PULL_LE_U32(input, input_index);
+ input_index += sizeof(uint32_t);
+ if (input_index == input_size) {
+ /*
+ * The compressor left room for indicator
+ * flags for data that doesn't exist.
+ */
+ break;
+ }
+ indicator_bit = 32;
+ }
+ indicator_bit--;
+
+ /*
+ * check whether the bit specified by indicator_bit is set or not
+ * set in indicator. For example, if indicator_bit has value 4
+ * check whether the 4th bit of the value in indicator is set
+ */
+ if (((indicator >> indicator_bit) & 1) == 0) {
+ CHECK_INPUT_BYTES(sizeof(uint8_t));
+ CHECK_OUTPUT_BYTES(sizeof(uint8_t));
+ output[output_index] = input[input_index];
+ input_index += sizeof(uint8_t);
+ output_index += sizeof(uint8_t);
+ } else {
+ uint32_t length;
+ uint32_t offset;
+
+ CHECK_INPUT_BYTES(sizeof(uint16_t));
+ length = PULL_LE_U16(input, input_index);
+ input_index += sizeof(uint16_t);
+ offset = (length >> 3) + 1;
+ length &= 7;
+
+ if (length == 7) {
+ if (nibble_index == 0) {
+ CHECK_INPUT_BYTES(sizeof(uint8_t));
+ nibble_index = input_index;
+ length = input[input_index] & 0xf;
+ input_index += sizeof(uint8_t);
+ } else {
+ length = input[nibble_index] >> 4;
+ nibble_index = 0;
+ }
+
+ if (length == 15) {
+ CHECK_INPUT_BYTES(sizeof(uint8_t));
+ length = input[input_index];
+ input_index += sizeof(uint8_t);
+ if (length == 255) {
+ CHECK_INPUT_BYTES(sizeof(uint16_t));
+ length = PULL_LE_U16(input, input_index);
+ input_index += sizeof(uint16_t);
+ if (length == 0) {
+ CHECK_INPUT_BYTES(sizeof(uint32_t));
+ length = PULL_LE_U32(input, input_index);
+ input_index += sizeof(uint32_t);
+ }
+
+ if (length < (15 + 7)) {
+ return -1;
+ }
+ length -= (15 + 7);
+ }
+ length += 15;
+ }
+ length += 7;
+ }
+ length += 3;
+
+ if (length == 0) {
+ return -1;
+ }
+
+ for (; length > 0; --length) {
+ if (offset > output_index) {
+ return -1;
+ }
+ CHECK_OUTPUT_BYTES(sizeof(uint8_t));
+ output[output_index] = output[output_index - offset];
+ output_index += sizeof(uint8_t);
+ }
+ }
+ } while ((output_index < max_output_size) && (input_index < (input_size)));
+
+ return output_index;
+}
diff --git a/lib/compression/lzxpress.h b/lib/compression/lzxpress.h
new file mode 100644
index 0000000..df0ee59
--- /dev/null
+++ b/lib/compression/lzxpress.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) Matthieu Suiche 2008
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the author nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#ifndef _LZXPRESS_H
+#define _LZXPRESS_H
+
+#define XPRESS_BLOCK_SIZE 0x10000
+
+ssize_t lzxpress_compress(const uint8_t *uncompressed,
+ uint32_t uncompressed_size,
+ uint8_t *compressed,
+ uint32_t max_compressed_size);
+
+ssize_t lzxpress_decompress(const uint8_t *input,
+ uint32_t input_size,
+ uint8_t *output,
+ uint32_t max_output_size);
+
+#endif /* _LZXPRESS_H */
diff --git a/lib/compression/testsuite.c b/lib/compression/testsuite.c
new file mode 100644
index 0000000..d4dc41b
--- /dev/null
+++ b/lib/compression/testsuite.c
@@ -0,0 +1,483 @@
+/*
+ Unix SMB/CIFS implementation.
+ test suite for the compression functions
+
+ Copyright (C) Jelmer Vernooij 2007
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "includes.h"
+#include "torture/torture.h"
+#include "torture/local/proto.h"
+#include "talloc.h"
+#include "lzxpress.h"
+#include "lib/util/base64.h"
+
+/* Tests based on [MS-XCA] 3.1 Examples */
+static bool test_msft_data1(
+ struct torture_context *test
+)
+{
+ TALLOC_CTX *tmp_ctx = talloc_new(test);
+
+ const char *fixed_data = "abcdefghijklmnopqrstuvwxyz";
+ const uint8_t fixed_out[] = {
+ 0x3f, 0x00, 0x00, 0x00, 0x61, 0x62, 0x63, 0x64,
+ 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
+ 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
+ 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a };
+
+ ssize_t c_size;
+ uint8_t *out, *out2;
+
+ out = talloc_size(tmp_ctx, 2048);
+ memset(out, 0x42, talloc_get_size(out));
+
+ torture_comment(test, "lzxpress fixed compression\n");
+ c_size = lzxpress_compress((const uint8_t *)fixed_data,
+ strlen(fixed_data),
+ out,
+ talloc_get_size(out));
+
+ torture_assert_int_equal(test, c_size, sizeof(fixed_out),
+ "fixed lzxpress_compress size");
+ torture_assert_mem_equal(test, out, fixed_out, c_size,
+ "fixed lzxpress_compress data");
+
+ torture_comment(test, "lzxpress fixed decompression\n");
+ out2 = talloc_size(tmp_ctx, strlen(fixed_data));
+ c_size = lzxpress_decompress(out,
+ sizeof(fixed_out),
+ out2,
+ talloc_get_size(out2));
+
+ torture_assert_int_equal(test, c_size, strlen(fixed_data),
+ "fixed lzxpress_decompress size");
+ torture_assert_mem_equal(test, out2, fixed_data, c_size,
+ "fixed lzxpress_decompress data");
+
+ talloc_free(tmp_ctx);
+ return true;
+}
+
+
+static bool test_msft_data2(
+ struct torture_context *test
+)
+{
+ TALLOC_CTX *tmp_ctx = talloc_new(test);
+
+ const char *fixed_data =
+ "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
+ "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
+ "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
+ "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
+ "abcabcabcabcabcabcabcabc";
+ const uint8_t fixed_out[] = {
+ 0xff, 0xff, 0xff, 0x1f, 0x61, 0x62, 0x63, 0x17,
+ 0x00, 0x0f, 0xff, 0x26, 0x01};
+
+ ssize_t c_size;
+ uint8_t *out, *out2;
+
+ out = talloc_size(tmp_ctx, 2048);
+ memset(out, 0x42, talloc_get_size(out));
+
+ torture_comment(test, "lzxpress fixed compression\n");
+ c_size = lzxpress_compress((const uint8_t *)fixed_data,
+ strlen(fixed_data),
+ out,
+ talloc_get_size(out));
+
+ torture_assert_int_equal(test, c_size, sizeof(fixed_out),
+ "fixed lzxpress_compress size");
+ torture_assert_mem_equal(test, out, fixed_out, c_size,
+ "fixed lzxpress_compress data");
+
+ torture_comment(test, "lzxpress fixed decompression\n");
+ out2 = talloc_size(tmp_ctx, strlen(fixed_data));
+ c_size = lzxpress_decompress(out,
+ sizeof(fixed_out),
+ out2,
+ talloc_get_size(out2));
+
+ torture_comment(test, "out2: %.*s\n", (int)c_size, (char *)out2);
+
+ torture_assert_int_equal(test, c_size, strlen(fixed_data),
+ "fixed lzxpress_decompress size");
+ torture_assert_mem_equal(test, out2, fixed_data, c_size,
+ "fixed lzxpress_decompress data");
+
+ talloc_free(tmp_ctx);
+ return true;
+}
+
+/*
+ test lzxpress
+ */
+static bool test_lzxpress(struct torture_context *test)
+{
+ TALLOC_CTX *tmp_ctx = talloc_new(test);
+ const char *fixed_data = "this is a test. and this is a test too";
+ const uint8_t fixed_out[] = {
+ 0xff, 0x21, 0x00, 0x04, 0x74, 0x68, 0x69, 0x73,
+ 0x20, 0x10, 0x00, 0x61, 0x20, 0x74, 0x65, 0x73,
+ 0x74, 0x2E, 0x20, 0x61, 0x6E, 0x64, 0x20, 0x9F,
+ 0x00, 0x04, 0x20, 0x74, 0x6F, 0x6F };
+
+ const uint8_t fixed_out_old_version[] = {
+ 0x00, 0x20, 0x00, 0x04, 0x74, 0x68, 0x69, 0x73,
+ 0x20, 0x10, 0x00, 0x61, 0x20, 0x74, 0x65, 0x73,
+ 0x74, 0x2E, 0x20, 0x61, 0x6E, 0x64, 0x20, 0x9F,
+ 0x00, 0x04, 0x20, 0x74, 0x6F, 0x6F, 0x00, 0x00,
+ 0x00, 0x00 };
+
+ ssize_t c_size;
+ uint8_t *out, *out2, *out3;
+
+ out = talloc_size(tmp_ctx, 2048);
+ memset(out, 0x42, talloc_get_size(out));
+
+ torture_comment(test, "lzxpress fixed compression\n");
+ c_size = lzxpress_compress((const uint8_t *)fixed_data,
+ strlen(fixed_data),
+ out,
+ talloc_get_size(out));
+
+ torture_assert_int_equal(test, c_size, sizeof(fixed_out),
+ "fixed lzxpress_compress size");
+ torture_assert_mem_equal(test, out, fixed_out, c_size,
+ "fixed lzxpress_compress data");
+
+ torture_comment(test, "lzxpress fixed decompression\n");
+ out2 = talloc_size(tmp_ctx, strlen(fixed_data));
+ c_size = lzxpress_decompress(out,
+ sizeof(fixed_out),
+ out2,
+ talloc_get_size(out2));
+
+ torture_assert_int_equal(test, c_size, strlen(fixed_data),
+ "fixed lzxpress_decompress size");
+ torture_assert_mem_equal(test, out2, fixed_data, c_size,
+ "fixed lzxpress_decompress data");
+
+
+ torture_comment(test, "lzxpress fixed decompression (old data)\n");
+ out3 = talloc_size(tmp_ctx, strlen(fixed_data));
+ c_size = lzxpress_decompress(fixed_out_old_version,
+ sizeof(fixed_out_old_version),
+ out3,
+ talloc_get_size(out3));
+
+ torture_assert_int_equal(test, c_size, strlen(fixed_data),
+ "fixed lzxpress_decompress size");
+ torture_assert_mem_equal(test, out3, fixed_data, c_size,
+ "fixed lzxpress_decompress data");
+
+ talloc_free(tmp_ctx);
+ return true;
+}
+
+static bool test_lzxpress2(struct torture_context *test)
+{
+ /*
+ * Use two matches, separated by a literal, and each with a length
+ * greater than 10, to test the use of nibble_index. Both length values
+ * (less ten) should be stored as adjacent nibbles to form the 0x21
+ * byte.
+ */
+
+ TALLOC_CTX *tmp_ctx = talloc_new(test);
+ const char *fixed_data = "aaaaaaaaaaaabaaaaaaaaaaaa";
+ const uint8_t fixed_out[] = {
+ 0xff, 0xff, 0xff, 0x5f, 0x61, 0x07, 0x00, 0x21,
+ 0x62, 0x67, 0x00};
+
+ ssize_t c_size;
+ uint8_t *out, *out2;
+
+ out = talloc_size(tmp_ctx, 2048);
+ memset(out, 0x42, talloc_get_size(out));
+
+ torture_comment(test, "lzxpress fixed compression\n");
+ c_size = lzxpress_compress((const uint8_t *)fixed_data,
+ strlen(fixed_data),
+ out,
+ talloc_get_size(out));
+
+ torture_assert_int_equal(test, c_size, sizeof(fixed_out),
+ "fixed lzxpress_compress size");
+ torture_assert_mem_equal(test, out, fixed_out, c_size,
+ "fixed lzxpress_compress data");
+
+ torture_comment(test, "lzxpress fixed decompression\n");
+ out2 = talloc_size(tmp_ctx, strlen(fixed_data));
+ c_size = lzxpress_decompress(out,
+ sizeof(fixed_out),
+ out2,
+ talloc_get_size(out2));
+
+ torture_assert_int_equal(test, c_size, strlen(fixed_data),
+ "fixed lzxpress_decompress size");
+ torture_assert_mem_equal(test, out2, fixed_data, c_size,
+ "fixed lzxpress_decompress data");
+
+ talloc_free(tmp_ctx);
+ return true;
+}
+
+static bool test_lzxpress3(struct torture_context *test)
+{
+ /*
+ * Use a series of 31 literals, followed by a single minimum-length
+ * match (and a terminating literal), to test setting indic_pos when the
+ * 32-bit flags value overflows after a match.
+ */
+
+ TALLOC_CTX *tmp_ctx = talloc_new(test);
+ const char *fixed_data = "abcdefghijklmnopqrstuvwxyz01234abca";
+ const uint8_t fixed_out[] = {
+ 0x01, 0x00, 0x00, 0x00, 0x61, 0x62, 0x63, 0x64,
+ 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
+ 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
+ 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x30, 0x31,
+ 0x32, 0x33, 0x34, 0xf0, 0x00, 0xff, 0xff, 0xff,
+ 0x7f, 0x61};
+
+ ssize_t c_size;
+ uint8_t *out, *out2;
+
+ out = talloc_size(tmp_ctx, 2048);
+ memset(out, 0x42, talloc_get_size(out));
+
+ torture_comment(test, "lzxpress fixed compression\n");
+ c_size = lzxpress_compress((const uint8_t *)fixed_data,
+ strlen(fixed_data),
+ out,
+ talloc_get_size(out));
+
+ torture_assert_int_equal(test, c_size, sizeof(fixed_out),
+ "fixed lzxpress_compress size");
+ torture_assert_mem_equal(test, out, fixed_out, c_size,
+ "fixed lzxpress_compress data");
+
+ torture_comment(test, "lzxpress fixed decompression\n");
+ out2 = talloc_size(tmp_ctx, strlen(fixed_data));
+ c_size = lzxpress_decompress(out,
+ sizeof(fixed_out),
+ out2,
+ talloc_get_size(out2));
+
+ torture_assert_int_equal(test, c_size, strlen(fixed_data),
+ "fixed lzxpress_decompress size");
+ torture_assert_mem_equal(test, out2, fixed_data, c_size,
+ "fixed lzxpress_decompress data");
+
+ talloc_free(tmp_ctx);
+ return true;
+}
+
+static bool test_lzxpress4(struct torture_context *test)
+{
+ /*
+ * Use a series of 31 literals, followed by a single minimum-length
+ * match, to test that the final set of 32-bit flags is written
+ * correctly when it is empty.
+ */
+
+ TALLOC_CTX *tmp_ctx = talloc_new(test);
+ const char *fixed_data = "abcdefghijklmnopqrstuvwxyz01234abc";
+ const uint8_t fixed_out[] = {
+ 0x01, 0x00, 0x00, 0x00, 0x61, 0x62, 0x63, 0x64,
+ 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
+ 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
+ 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x30, 0x31,
+ 0x32, 0x33, 0x34, 0xf0, 0x00, 0xff, 0xff, 0xff,
+ 0xff};
+
+ ssize_t c_size;
+ uint8_t *out, *out2;
+
+ out = talloc_size(tmp_ctx, 2048);
+ memset(out, 0x42, talloc_get_size(out));
+
+ torture_comment(test, "lzxpress fixed compression\n");
+ c_size = lzxpress_compress((const uint8_t *)fixed_data,
+ strlen(fixed_data),
+ out,
+ talloc_get_size(out));
+
+ torture_assert_int_equal(test, c_size, sizeof(fixed_out),
+ "fixed lzxpress_compress size");
+ torture_assert_mem_equal(test, out, fixed_out, c_size,
+ "fixed lzxpress_compress data");
+
+ torture_comment(test, "lzxpress fixed decompression\n");
+ out2 = talloc_size(tmp_ctx, strlen(fixed_data));
+ c_size = lzxpress_decompress(out,
+ sizeof(fixed_out),
+ out2,
+ talloc_get_size(out2));
+
+ torture_assert_int_equal(test, c_size, strlen(fixed_data),
+ "fixed lzxpress_decompress size");
+ torture_assert_mem_equal(test, out2, fixed_data, c_size,
+ "fixed lzxpress_decompress data");
+
+ talloc_free(tmp_ctx);
+ return true;
+}
+
+
+static bool test_lzxpress_many_zeros(struct torture_context *test)
+{
+ /*
+ * Repeated values (zero is convenient but not special) will lead to
+ * very long substring searches in compression, which can be very slow
+ * if we're not careful.
+ *
+ * This test makes a very loose assertion about how long it should
+ * take to compress a million zeros.
+ *
+ * Wall clock time *should* be < 0.1 seconds with the fix and around a
+ * minute without it. We try for CLOCK_THREAD_CPUTIME_ID which should
+ * filter out some noise on the machine, and set the threshold at 5
+ * seconds.
+ */
+
+ TALLOC_CTX *tmp_ctx = talloc_new(test);
+ const size_t N_ZEROS = 1000000;
+ const uint8_t *zeros = talloc_zero_size(tmp_ctx, N_ZEROS);
+ const ssize_t expected_c_size = 93;
+ ssize_t c_size;
+ uint8_t *comp, *decomp;
+ static struct timespec t_start, t_end;
+ uint64_t elapsed_ns;
+
+ if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &t_start) != 0) {
+ if (clock_gettime(CUSTOM_CLOCK_MONOTONIC, &t_start) != 0) {
+ clock_gettime(CLOCK_REALTIME, &t_start);
+ }
+ }
+
+ comp = talloc_zero_size(tmp_ctx, 2048);
+
+ c_size = lzxpress_compress(zeros,
+ N_ZEROS,
+ comp,
+ talloc_get_size(comp));
+
+ torture_assert_int_equal(test, c_size, expected_c_size,
+ "fixed lzxpress_compress size");
+
+ decomp = talloc_size(tmp_ctx, N_ZEROS * 2);
+ c_size = lzxpress_decompress(comp,
+ c_size,
+ decomp,
+ N_ZEROS * 2);
+
+ if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &t_end) != 0) {
+ if (clock_gettime(CUSTOM_CLOCK_MONOTONIC, &t_end) != 0) {
+ clock_gettime(CLOCK_REALTIME, &t_end);
+ }
+ }
+ elapsed_ns = (
+ (t_end.tv_sec - t_start.tv_sec) * 1000U * 1000U * 1000U) +
+ (t_end.tv_nsec - t_start.tv_nsec);
+ torture_comment(test, "round-trip time: %"PRIu64" ns\n", elapsed_ns);
+ torture_assert(test, elapsed_ns < 3 * 1000U * 1000U * 1000U,
+ "million zeros round trip tool > 3 seconds");
+ torture_assert_mem_equal(test, decomp, zeros, N_ZEROS,
+ "fixed lzxpress_decompress data");
+
+ talloc_free(tmp_ctx);
+ return true;
+}
+
+
+static bool test_lzxpress_round_trip(struct torture_context *test)
+{
+ /*
+ * Examples found using via fuzzing.
+ */
+ TALLOC_CTX *tmp_ctx = talloc_new(test);
+ size_t i;
+ struct b64_pair {
+ const char *uncompressed;
+ const char *compressed;
+ } pairs[] = {
+ { /* this results in a trailing flags block */
+ "AAICAmq/EKdP785YU2Ddh7d4vUtdlQyLeHV09LHpUBw=",
+ "AAAAAAACAgJqvxCnT+/OWFNg3Ye3eL1LXZUMi3h1dPSx6VAc/////w==",
+ },
+ { /* empty string compresses to empty string */
+ "", ""
+ },
+ };
+ const size_t alloc_size = 1000;
+ uint8_t *data = talloc_array(tmp_ctx, uint8_t, alloc_size);
+
+ for (i = 0; i < ARRAY_SIZE(pairs); i++) {
+ ssize_t len;
+ DATA_BLOB uncomp = base64_decode_data_blob_talloc(
+ tmp_ctx,
+ pairs[i].uncompressed);
+ DATA_BLOB comp = base64_decode_data_blob_talloc(
+ tmp_ctx,
+ pairs[i].compressed);
+
+ len = lzxpress_compress(uncomp.data,
+ uncomp.length,
+ data,
+ alloc_size);
+
+ torture_assert_int_equal(test, len, comp.length,
+ "lzexpress compression size");
+
+ torture_assert_mem_equal(test, comp.data, data, len,
+ "lzxpress compression data");
+
+ len = lzxpress_decompress(comp.data,
+ comp.length,
+ data,
+ alloc_size);
+
+ torture_assert_int_equal(test, len, uncomp.length,
+ "lzexpress decompression size");
+
+ torture_assert_mem_equal(test, uncomp.data, data, len,
+ "lzxpress decompression data");
+ }
+ talloc_free(tmp_ctx);
+ return true;
+}
+
+
+struct torture_suite *torture_local_compression(TALLOC_CTX *mem_ctx)
+{
+ struct torture_suite *suite = torture_suite_create(mem_ctx, "compression");
+
+ torture_suite_add_simple_test(suite, "lzxpress", test_lzxpress);
+ torture_suite_add_simple_test(suite, "lzxpress_msft_data1", test_msft_data1);
+ torture_suite_add_simple_test(suite, "lzxpress_msft_data2", test_msft_data2);
+ torture_suite_add_simple_test(suite, "lzxpress2", test_lzxpress2);
+ torture_suite_add_simple_test(suite, "lzxpress3", test_lzxpress3);
+ torture_suite_add_simple_test(suite, "lzxpress4", test_lzxpress4);
+ torture_suite_add_simple_test(suite, "lzxpress_many_zeros",
+ test_lzxpress_many_zeros);
+ torture_suite_add_simple_test(suite, "lzxpress_round_trip",
+ test_lzxpress_round_trip);
+ return suite;
+}
diff --git a/lib/compression/wscript_build b/lib/compression/wscript_build
new file mode 100644
index 0000000..7ad5333
--- /dev/null
+++ b/lib/compression/wscript_build
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+
+bld.SAMBA_SUBSYSTEM('LZXPRESS',
+ deps='replace',
+ source='lzxpress.c'
+ )