From cf178685aca107aa37c748de11da01562e78c46c Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 15 Jun 2024 11:41:34 +0200 Subject: Adding upstream version 5.6.2. Signed-off-by: Daniel Baumann --- src/liblzma/lz/Makefile.inc | 6 +- src/liblzma/lz/lz_decoder.c | 48 ++++++++++---- src/liblzma/lz/lz_decoder.h | 116 +++++++++++++++++++-------------- src/liblzma/lz/lz_encoder.c | 13 ++-- src/liblzma/lz/lz_encoder.h | 21 ++++-- src/liblzma/lz/lz_encoder_hash.h | 5 +- src/liblzma/lz/lz_encoder_hash_table.h | 4 +- src/liblzma/lz/lz_encoder_mf.c | 5 +- 8 files changed, 130 insertions(+), 88 deletions(-) (limited to 'src/liblzma/lz') diff --git a/src/liblzma/lz/Makefile.inc b/src/liblzma/lz/Makefile.inc index 75742a8..15235d7 100644 --- a/src/liblzma/lz/Makefile.inc +++ b/src/liblzma/lz/Makefile.inc @@ -1,9 +1,5 @@ -## +## SPDX-License-Identifier: 0BSD ## Author: Lasse Collin -## -## This file has been put into the public domain. -## You can do whatever you want with this file. -## if COND_ENCODER_LZ liblzma_la_SOURCES += \ diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c index 06c95c1..92913f2 100644 --- a/src/liblzma/lz/lz_decoder.c +++ b/src/liblzma/lz/lz_decoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lz_decoder.c @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// // liblzma supports multiple LZ77-based filters. The LZ part is shared @@ -54,9 +53,10 @@ typedef struct { static void lz_decoder_reset(lzma_coder *coder) { - coder->dict.pos = 0; + coder->dict.pos = 2 * LZ_DICT_REPEAT_MAX; coder->dict.full = 0; - coder->dict.buf[coder->dict.size - 1] = '\0'; + coder->dict.buf[2 * LZ_DICT_REPEAT_MAX - 1] = '\0'; + coder->dict.has_wrapped = false; coder->dict.need_reset = false; return; } @@ -70,8 +70,15 @@ decode_buffer(lzma_coder *coder, { while (true) { // Wrap the dictionary if needed. - if (coder->dict.pos == coder->dict.size) - coder->dict.pos = 0; + if (coder->dict.pos == coder->dict.size) { + // See the comment of #define LZ_DICT_REPEAT_MAX. + coder->dict.pos = LZ_DICT_REPEAT_MAX; + coder->dict.has_wrapped = true; + memcpy(coder->dict.buf, coder->dict.buf + + coder->dict.size + - LZ_DICT_REPEAT_MAX, + LZ_DICT_REPEAT_MAX); + } // Store the current dictionary position. It is needed to know // where to start copying to the out[] buffer. @@ -253,21 +260,31 @@ lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, // dictionary to the output buffer, since applications are // recommended to give aligned buffers to liblzma. // + // Reserve 2 * LZ_DICT_REPEAT_MAX bytes of extra space which is + // needed for alloc_size. + // // Avoid integer overflow. - if (lz_options.dict_size > SIZE_MAX - 15) + if (lz_options.dict_size > SIZE_MAX - 15 - 2 * LZ_DICT_REPEAT_MAX) return LZMA_MEM_ERROR; lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15)); + // Reserve extra space as explained in the comment + // of #define LZ_DICT_REPEAT_MAX. + const size_t alloc_size + = lz_options.dict_size + 2 * LZ_DICT_REPEAT_MAX; + // Allocate and initialize the dictionary. - if (coder->dict.size != lz_options.dict_size) { + if (coder->dict.size != alloc_size) { lzma_free(coder->dict.buf, allocator); - coder->dict.buf - = lzma_alloc(lz_options.dict_size, allocator); + coder->dict.buf = lzma_alloc(alloc_size, allocator); if (coder->dict.buf == NULL) return LZMA_MEM_ERROR; - coder->dict.size = lz_options.dict_size; + // NOTE: Yes, alloc_size, not lz_options.dict_size. The way + // coder->dict.full is updated will take care that we will + // still reject distances larger than lz_options.dict_size. + coder->dict.size = alloc_size; } lz_decoder_reset(next->coder); @@ -280,9 +297,12 @@ lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator, const size_t copy_size = my_min(lz_options.preset_dict_size, lz_options.dict_size); const size_t offset = lz_options.preset_dict_size - copy_size; - memcpy(coder->dict.buf, lz_options.preset_dict + offset, + memcpy(coder->dict.buf + coder->dict.pos, + lz_options.preset_dict + offset, copy_size); - coder->dict.pos = copy_size; + + // dict.pos isn't zero after lz_decoder_reset(). + coder->dict.pos += copy_size; coder->dict.full = copy_size; } diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h index ad80d4d..cb61b6e 100644 --- a/src/liblzma/lz/lz_decoder.h +++ b/src/liblzma/lz/lz_decoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lz_decoder.h @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_LZ_DECODER_H @@ -17,10 +16,28 @@ #include "common.h" +/// Maximum length of a match rounded up to a nice power of 2 which is +/// a good size for aligned memcpy(). The allocated dictionary buffer will +/// be 2 * LZ_DICT_REPEAT_MAX bytes larger than the actual dictionary size: +/// +/// (1) Every time the decoder reaches the end of the dictionary buffer, +/// the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning. +/// This way dict_repeat() will only need to copy from one place, +/// never from both the end and beginning of the buffer. +/// +/// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between +/// the oldest byte still in the dictionary and the current write +/// position. This way dict_repeat(dict, dict->size - 1, &len) +/// won't need memmove() as the copying cannot overlap. +/// +/// Note that memcpy() still cannot be used if distance < len. +/// +/// LZMA's longest match length is 273 so pick a multiple of 16 above that. +#define LZ_DICT_REPEAT_MAX 288 + + typedef struct { - /// Pointer to the dictionary buffer. It can be an allocated buffer - /// internal to liblzma, or it can a be a buffer given by the - /// application when in single-call mode (not implemented yet). + /// Pointer to the dictionary buffer. uint8_t *buf; /// Write position in dictionary. The next byte will be written to @@ -35,9 +52,16 @@ typedef struct { /// Write limit size_t limit; - /// Size of the dictionary + /// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes + /// larger than the actual dictionary size. This is enforced by + /// how the value for "full" is set; it can be at most + /// "size - 2 * LZ_DICT_REPEAT_MAX". size_t size; + /// True once the dictionary has become full and the writing position + /// has been wrapped in decode_buffer() in lz_decoder.c. + bool has_wrapped; + /// True when dictionary should be reset before decoding more data. bool need_reset; @@ -103,7 +127,16 @@ static inline uint8_t dict_get(const lzma_dict *const dict, const uint32_t distance) { return dict->buf[dict->pos - distance - 1 - + (distance < dict->pos ? 0 : dict->size)]; + + (distance < dict->pos + ? 0 : dict->size - LZ_DICT_REPEAT_MAX)]; +} + + +/// Optimized version of dict_get(dict, 0) +static inline uint8_t +dict_get0(const lzma_dict *const dict) +{ + return dict->buf[dict->pos - 1]; } @@ -132,68 +165,51 @@ dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len) uint32_t left = my_min(dict_avail, *len); *len -= left; + size_t back = dict->pos - distance - 1; + if (distance >= dict->pos) + back += dict->size - LZ_DICT_REPEAT_MAX; + // Repeat a block of data from the history. Because memcpy() is faster // than copying byte by byte in a loop, the copying process gets split - // into three cases. + // into two cases. if (distance < left) { // Source and target areas overlap, thus we can't use // memcpy() nor even memmove() safely. do { - dict->buf[dict->pos] = dict_get(dict, distance); - ++dict->pos; + dict->buf[dict->pos++] = dict->buf[back++]; } while (--left > 0); - - } else if (distance < dict->pos) { - // The easiest and fastest case - memcpy(dict->buf + dict->pos, - dict->buf + dict->pos - distance - 1, - left); - dict->pos += left; - } else { - // The bigger the dictionary, the more rare this - // case occurs. We need to "wrap" the dict, thus - // we might need two memcpy() to copy all the data. - assert(dict->full == dict->size); - const uint32_t copy_pos - = dict->pos - distance - 1 + dict->size; - uint32_t copy_size = dict->size - copy_pos; - - if (copy_size < left) { - memmove(dict->buf + dict->pos, dict->buf + copy_pos, - copy_size); - dict->pos += copy_size; - copy_size = left - copy_size; - memcpy(dict->buf + dict->pos, dict->buf, copy_size); - dict->pos += copy_size; - } else { - memmove(dict->buf + dict->pos, dict->buf + copy_pos, - left); - dict->pos += left; - } + memcpy(dict->buf + dict->pos, dict->buf + back, left); + dict->pos += left; } // Update how full the dictionary is. - if (dict->full < dict->pos) - dict->full = dict->pos; + if (!dict->has_wrapped) + dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX; - return unlikely(*len != 0); + return *len != 0; +} + + +static inline void +dict_put(lzma_dict *dict, uint8_t byte) +{ + dict->buf[dict->pos++] = byte; + + if (!dict->has_wrapped) + dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX; } /// Puts one byte into the dictionary. Returns true if the dictionary was /// already full and the byte couldn't be added. static inline bool -dict_put(lzma_dict *dict, uint8_t byte) +dict_put_safe(lzma_dict *dict, uint8_t byte) { if (unlikely(dict->pos == dict->limit)) return true; - dict->buf[dict->pos++] = byte; - - if (dict->pos > dict->full) - dict->full = dict->pos; - + dict_put(dict, byte); return false; } @@ -217,8 +233,8 @@ dict_write(lzma_dict *restrict dict, const uint8_t *restrict in, *left -= lzma_bufcpy(in, in_pos, in_size, dict->buf, &dict->pos, dict->limit); - if (dict->pos > dict->full) - dict->full = dict->pos; + if (!dict->has_wrapped) + dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX; return; } diff --git a/src/liblzma/lz/lz_encoder.c b/src/liblzma/lz/lz_encoder.c index 5489085..4af23e1 100644 --- a/src/liblzma/lz/lz_encoder.c +++ b/src/liblzma/lz/lz_encoder.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lz_encoder.c @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "lz_encoder.h" @@ -196,9 +195,7 @@ lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator, // For now, the dictionary size is limited to 1.5 GiB. This may grow // in the future if needed, but it needs a little more work than just // changing this check. - if (lz_options->dict_size < LZMA_DICT_SIZE_MIN - || lz_options->dict_size - > (UINT32_C(1) << 30) + (UINT32_C(1) << 29) + if (!IS_ENC_DICT_SIZE_VALID(lz_options->dict_size) || lz_options->nice_len > lz_options->match_len_max) return true; @@ -549,7 +546,7 @@ lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, lzma_lz_options *lz_options)) { #if defined(HAVE_SMALL) && !defined(HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR) - // We need that the CRC32 table has been initialized. + // The CRC32 table must be initialized. lzma_crc32_init(); #endif @@ -569,6 +566,8 @@ lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator, coder->lz.coder = NULL; coder->lz.code = NULL; coder->lz.end = NULL; + coder->lz.options_update = NULL; + coder->lz.set_out_limit = NULL; // mf.size is initialized to silence Valgrind // when used on optimized binaries (GCC may reorder diff --git a/src/liblzma/lz/lz_encoder.h b/src/liblzma/lz/lz_encoder.h index ffcba02..eb197c6 100644 --- a/src/liblzma/lz/lz_encoder.h +++ b/src/liblzma/lz/lz_encoder.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lz_encoder.h @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_LZ_ENCODER_H @@ -17,6 +16,14 @@ #include "common.h" +// For now, the dictionary size is limited to 1.5 GiB. This may grow +// in the future if needed, but it needs a little more work than just +// changing this check. +#define IS_ENC_DICT_SIZE_VALID(size) \ + ((size) >= LZMA_DICT_SIZE_MIN \ + && (size) <= (UINT32_C(1) << 30) + (UINT32_C(1) << 29)) + + /// A table of these is used by the LZ-based encoder to hold /// the length-distance pairs found by the match finder. typedef struct { @@ -153,9 +160,13 @@ typedef struct { /// Maximum search depth uint32_t depth; - /// TODO: Comment + /// Initial dictionary for the match finder to search. const uint8_t *preset_dict; + /// If the preset dictionary is NULL, this value is ignored. + /// Otherwise this member must indicate the preset dictionary's + /// buffer size. If this size is larger than dict_size, then only + /// the dict_size sized tail of the preset_dict will be used. uint32_t preset_dict_size; } lzma_lz_options; @@ -217,7 +228,7 @@ typedef struct { // 3. The literals and matches are encoded using e.g. LZMA. // // The bytes that have been ran through the match finder, but not encoded yet, -// are called `read ahead'. +// are called 'read ahead'. /// Get how many bytes the match finder hashes in its initial step. diff --git a/src/liblzma/lz/lz_encoder_hash.h b/src/liblzma/lz/lz_encoder_hash.h index 4d9971a..8ace82b 100644 --- a/src/liblzma/lz/lz_encoder_hash.h +++ b/src/liblzma/lz/lz_encoder_hash.h @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lz_encoder_hash.h @@ -5,9 +7,6 @@ // // Author: Igor Pavlov // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #ifndef LZMA_LZ_ENCODER_HASH_H diff --git a/src/liblzma/lz/lz_encoder_hash_table.h b/src/liblzma/lz/lz_encoder_hash_table.h index 8c51717..2b3a60e 100644 --- a/src/liblzma/lz/lz_encoder_hash_table.h +++ b/src/liblzma/lz/lz_encoder_hash_table.h @@ -1,4 +1,6 @@ -/* This file has been automatically generated by crc32_tablegen.c. */ +// SPDX-License-Identifier: 0BSD + +// This file has been generated by crc32_tablegen.c. const uint32_t lzma_lz_hash_table[256] = { 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, diff --git a/src/liblzma/lz/lz_encoder_mf.c b/src/liblzma/lz/lz_encoder_mf.c index 1fdc2d7..557c261 100644 --- a/src/liblzma/lz/lz_encoder_mf.c +++ b/src/liblzma/lz/lz_encoder_mf.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: 0BSD + /////////////////////////////////////////////////////////////////////////////// // /// \file lz_encoder_mf.c @@ -6,9 +8,6 @@ // Authors: Igor Pavlov // Lasse Collin // -// This file has been put into the public domain. -// You can do whatever you want with this file. -// /////////////////////////////////////////////////////////////////////////////// #include "lz_encoder.h" -- cgit v1.2.3