summaryrefslogtreecommitdiffstats
path: root/src/liblzma/lz
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-15 09:41:34 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-15 09:41:34 +0000
commitcf178685aca107aa37c748de11da01562e78c46c (patch)
tree84d60b39c1744edcbdd4dbfc5026583914432dba /src/liblzma/lz
parentAdding upstream version 5.6.1+really5.4.5. (diff)
downloadxz-utils-upstream.tar.xz
xz-utils-upstream.zip
Adding upstream version 5.6.2.upstream/5.6.2upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/liblzma/lz')
-rw-r--r--src/liblzma/lz/Makefile.inc6
-rw-r--r--src/liblzma/lz/lz_decoder.c48
-rw-r--r--src/liblzma/lz/lz_decoder.h116
-rw-r--r--src/liblzma/lz/lz_encoder.c13
-rw-r--r--src/liblzma/lz/lz_encoder.h21
-rw-r--r--src/liblzma/lz/lz_encoder_hash.h5
-rw-r--r--src/liblzma/lz/lz_encoder_hash_table.h4
-rw-r--r--src/liblzma/lz/lz_encoder_mf.c5
8 files changed, 130 insertions, 88 deletions
diff --git a/src/liblzma/lz/Makefile.inc b/src/liblzma/lz/Makefile.inc
index 75742a8..15235d7 100644
--- a/src/liblzma/lz/Makefile.inc
+++ b/src/liblzma/lz/Makefile.inc
@@ -1,9 +1,5 @@
-##
+## SPDX-License-Identifier: 0BSD
## Author: Lasse Collin
-##
-## This file has been put into the public domain.
-## You can do whatever you want with this file.
-##
if COND_ENCODER_LZ
liblzma_la_SOURCES += \
diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c
index 06c95c1..92913f2 100644
--- a/src/liblzma/lz/lz_decoder.c
+++ b/src/liblzma/lz/lz_decoder.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lz_decoder.c
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
// liblzma supports multiple LZ77-based filters. The LZ part is shared
@@ -54,9 +53,10 @@ typedef struct {
static void
lz_decoder_reset(lzma_coder *coder)
{
- coder->dict.pos = 0;
+ coder->dict.pos = 2 * LZ_DICT_REPEAT_MAX;
coder->dict.full = 0;
- coder->dict.buf[coder->dict.size - 1] = '\0';
+ coder->dict.buf[2 * LZ_DICT_REPEAT_MAX - 1] = '\0';
+ coder->dict.has_wrapped = false;
coder->dict.need_reset = false;
return;
}
@@ -70,8 +70,15 @@ decode_buffer(lzma_coder *coder,
{
while (true) {
// Wrap the dictionary if needed.
- if (coder->dict.pos == coder->dict.size)
- coder->dict.pos = 0;
+ if (coder->dict.pos == coder->dict.size) {
+ // See the comment of #define LZ_DICT_REPEAT_MAX.
+ coder->dict.pos = LZ_DICT_REPEAT_MAX;
+ coder->dict.has_wrapped = true;
+ memcpy(coder->dict.buf, coder->dict.buf
+ + coder->dict.size
+ - LZ_DICT_REPEAT_MAX,
+ LZ_DICT_REPEAT_MAX);
+ }
// Store the current dictionary position. It is needed to know
// where to start copying to the out[] buffer.
@@ -253,21 +260,31 @@ lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
// dictionary to the output buffer, since applications are
// recommended to give aligned buffers to liblzma.
//
+ // Reserve 2 * LZ_DICT_REPEAT_MAX bytes of extra space which is
+ // needed for alloc_size.
+ //
// Avoid integer overflow.
- if (lz_options.dict_size > SIZE_MAX - 15)
+ if (lz_options.dict_size > SIZE_MAX - 15 - 2 * LZ_DICT_REPEAT_MAX)
return LZMA_MEM_ERROR;
lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15));
+ // Reserve extra space as explained in the comment
+ // of #define LZ_DICT_REPEAT_MAX.
+ const size_t alloc_size
+ = lz_options.dict_size + 2 * LZ_DICT_REPEAT_MAX;
+
// Allocate and initialize the dictionary.
- if (coder->dict.size != lz_options.dict_size) {
+ if (coder->dict.size != alloc_size) {
lzma_free(coder->dict.buf, allocator);
- coder->dict.buf
- = lzma_alloc(lz_options.dict_size, allocator);
+ coder->dict.buf = lzma_alloc(alloc_size, allocator);
if (coder->dict.buf == NULL)
return LZMA_MEM_ERROR;
- coder->dict.size = lz_options.dict_size;
+ // NOTE: Yes, alloc_size, not lz_options.dict_size. The way
+ // coder->dict.full is updated will take care that we will
+ // still reject distances larger than lz_options.dict_size.
+ coder->dict.size = alloc_size;
}
lz_decoder_reset(next->coder);
@@ -280,9 +297,12 @@ lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
const size_t copy_size = my_min(lz_options.preset_dict_size,
lz_options.dict_size);
const size_t offset = lz_options.preset_dict_size - copy_size;
- memcpy(coder->dict.buf, lz_options.preset_dict + offset,
+ memcpy(coder->dict.buf + coder->dict.pos,
+ lz_options.preset_dict + offset,
copy_size);
- coder->dict.pos = copy_size;
+
+ // dict.pos isn't zero after lz_decoder_reset().
+ coder->dict.pos += copy_size;
coder->dict.full = copy_size;
}
diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h
index ad80d4d..cb61b6e 100644
--- a/src/liblzma/lz/lz_decoder.h
+++ b/src/liblzma/lz/lz_decoder.h
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lz_decoder.h
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_LZ_DECODER_H
@@ -17,10 +16,28 @@
#include "common.h"
+/// Maximum length of a match rounded up to a nice power of 2 which is
+/// a good size for aligned memcpy(). The allocated dictionary buffer will
+/// be 2 * LZ_DICT_REPEAT_MAX bytes larger than the actual dictionary size:
+///
+/// (1) Every time the decoder reaches the end of the dictionary buffer,
+/// the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning.
+/// This way dict_repeat() will only need to copy from one place,
+/// never from both the end and beginning of the buffer.
+///
+/// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between
+/// the oldest byte still in the dictionary and the current write
+/// position. This way dict_repeat(dict, dict->size - 1, &len)
+/// won't need memmove() as the copying cannot overlap.
+///
+/// Note that memcpy() still cannot be used if distance < len.
+///
+/// LZMA's longest match length is 273 so pick a multiple of 16 above that.
+#define LZ_DICT_REPEAT_MAX 288
+
+
typedef struct {
- /// Pointer to the dictionary buffer. It can be an allocated buffer
- /// internal to liblzma, or it can a be a buffer given by the
- /// application when in single-call mode (not implemented yet).
+ /// Pointer to the dictionary buffer.
uint8_t *buf;
/// Write position in dictionary. The next byte will be written to
@@ -35,9 +52,16 @@ typedef struct {
/// Write limit
size_t limit;
- /// Size of the dictionary
+ /// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes
+ /// larger than the actual dictionary size. This is enforced by
+ /// how the value for "full" is set; it can be at most
+ /// "size - 2 * LZ_DICT_REPEAT_MAX".
size_t size;
+ /// True once the dictionary has become full and the writing position
+ /// has been wrapped in decode_buffer() in lz_decoder.c.
+ bool has_wrapped;
+
/// True when dictionary should be reset before decoding more data.
bool need_reset;
@@ -103,7 +127,16 @@ static inline uint8_t
dict_get(const lzma_dict *const dict, const uint32_t distance)
{
return dict->buf[dict->pos - distance - 1
- + (distance < dict->pos ? 0 : dict->size)];
+ + (distance < dict->pos
+ ? 0 : dict->size - LZ_DICT_REPEAT_MAX)];
+}
+
+
+/// Optimized version of dict_get(dict, 0)
+static inline uint8_t
+dict_get0(const lzma_dict *const dict)
+{
+ return dict->buf[dict->pos - 1];
}
@@ -132,68 +165,51 @@ dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
uint32_t left = my_min(dict_avail, *len);
*len -= left;
+ size_t back = dict->pos - distance - 1;
+ if (distance >= dict->pos)
+ back += dict->size - LZ_DICT_REPEAT_MAX;
+
// Repeat a block of data from the history. Because memcpy() is faster
// than copying byte by byte in a loop, the copying process gets split
- // into three cases.
+ // into two cases.
if (distance < left) {
// Source and target areas overlap, thus we can't use
// memcpy() nor even memmove() safely.
do {
- dict->buf[dict->pos] = dict_get(dict, distance);
- ++dict->pos;
+ dict->buf[dict->pos++] = dict->buf[back++];
} while (--left > 0);
-
- } else if (distance < dict->pos) {
- // The easiest and fastest case
- memcpy(dict->buf + dict->pos,
- dict->buf + dict->pos - distance - 1,
- left);
- dict->pos += left;
-
} else {
- // The bigger the dictionary, the more rare this
- // case occurs. We need to "wrap" the dict, thus
- // we might need two memcpy() to copy all the data.
- assert(dict->full == dict->size);
- const uint32_t copy_pos
- = dict->pos - distance - 1 + dict->size;
- uint32_t copy_size = dict->size - copy_pos;
-
- if (copy_size < left) {
- memmove(dict->buf + dict->pos, dict->buf + copy_pos,
- copy_size);
- dict->pos += copy_size;
- copy_size = left - copy_size;
- memcpy(dict->buf + dict->pos, dict->buf, copy_size);
- dict->pos += copy_size;
- } else {
- memmove(dict->buf + dict->pos, dict->buf + copy_pos,
- left);
- dict->pos += left;
- }
+ memcpy(dict->buf + dict->pos, dict->buf + back, left);
+ dict->pos += left;
}
// Update how full the dictionary is.
- if (dict->full < dict->pos)
- dict->full = dict->pos;
+ if (!dict->has_wrapped)
+ dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
- return unlikely(*len != 0);
+ return *len != 0;
+}
+
+
+static inline void
+dict_put(lzma_dict *dict, uint8_t byte)
+{
+ dict->buf[dict->pos++] = byte;
+
+ if (!dict->has_wrapped)
+ dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
}
/// Puts one byte into the dictionary. Returns true if the dictionary was
/// already full and the byte couldn't be added.
static inline bool
-dict_put(lzma_dict *dict, uint8_t byte)
+dict_put_safe(lzma_dict *dict, uint8_t byte)
{
if (unlikely(dict->pos == dict->limit))
return true;
- dict->buf[dict->pos++] = byte;
-
- if (dict->pos > dict->full)
- dict->full = dict->pos;
-
+ dict_put(dict, byte);
return false;
}
@@ -217,8 +233,8 @@ dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
*left -= lzma_bufcpy(in, in_pos, in_size,
dict->buf, &dict->pos, dict->limit);
- if (dict->pos > dict->full)
- dict->full = dict->pos;
+ if (!dict->has_wrapped)
+ dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
return;
}
diff --git a/src/liblzma/lz/lz_encoder.c b/src/liblzma/lz/lz_encoder.c
index 5489085..4af23e1 100644
--- a/src/liblzma/lz/lz_encoder.c
+++ b/src/liblzma/lz/lz_encoder.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lz_encoder.c
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#include "lz_encoder.h"
@@ -196,9 +195,7 @@ lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator,
// For now, the dictionary size is limited to 1.5 GiB. This may grow
// in the future if needed, but it needs a little more work than just
// changing this check.
- if (lz_options->dict_size < LZMA_DICT_SIZE_MIN
- || lz_options->dict_size
- > (UINT32_C(1) << 30) + (UINT32_C(1) << 29)
+ if (!IS_ENC_DICT_SIZE_VALID(lz_options->dict_size)
|| lz_options->nice_len > lz_options->match_len_max)
return true;
@@ -549,7 +546,7 @@ lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
lzma_lz_options *lz_options))
{
#if defined(HAVE_SMALL) && !defined(HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR)
- // We need that the CRC32 table has been initialized.
+ // The CRC32 table must be initialized.
lzma_crc32_init();
#endif
@@ -569,6 +566,8 @@ lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
coder->lz.coder = NULL;
coder->lz.code = NULL;
coder->lz.end = NULL;
+ coder->lz.options_update = NULL;
+ coder->lz.set_out_limit = NULL;
// mf.size is initialized to silence Valgrind
// when used on optimized binaries (GCC may reorder
diff --git a/src/liblzma/lz/lz_encoder.h b/src/liblzma/lz/lz_encoder.h
index ffcba02..eb197c6 100644
--- a/src/liblzma/lz/lz_encoder.h
+++ b/src/liblzma/lz/lz_encoder.h
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lz_encoder.h
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_LZ_ENCODER_H
@@ -17,6 +16,14 @@
#include "common.h"
+// For now, the dictionary size is limited to 1.5 GiB. This may grow
+// in the future if needed, but it needs a little more work than just
+// changing this check.
+#define IS_ENC_DICT_SIZE_VALID(size) \
+ ((size) >= LZMA_DICT_SIZE_MIN \
+ && (size) <= (UINT32_C(1) << 30) + (UINT32_C(1) << 29))
+
+
/// A table of these is used by the LZ-based encoder to hold
/// the length-distance pairs found by the match finder.
typedef struct {
@@ -153,9 +160,13 @@ typedef struct {
/// Maximum search depth
uint32_t depth;
- /// TODO: Comment
+ /// Initial dictionary for the match finder to search.
const uint8_t *preset_dict;
+ /// If the preset dictionary is NULL, this value is ignored.
+ /// Otherwise this member must indicate the preset dictionary's
+ /// buffer size. If this size is larger than dict_size, then only
+ /// the dict_size sized tail of the preset_dict will be used.
uint32_t preset_dict_size;
} lzma_lz_options;
@@ -217,7 +228,7 @@ typedef struct {
// 3. The literals and matches are encoded using e.g. LZMA.
//
// The bytes that have been ran through the match finder, but not encoded yet,
-// are called `read ahead'.
+// are called 'read ahead'.
/// Get how many bytes the match finder hashes in its initial step.
diff --git a/src/liblzma/lz/lz_encoder_hash.h b/src/liblzma/lz/lz_encoder_hash.h
index 4d9971a..8ace82b 100644
--- a/src/liblzma/lz/lz_encoder_hash.h
+++ b/src/liblzma/lz/lz_encoder_hash.h
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lz_encoder_hash.h
@@ -5,9 +7,6 @@
//
// Author: Igor Pavlov
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_LZ_ENCODER_HASH_H
diff --git a/src/liblzma/lz/lz_encoder_hash_table.h b/src/liblzma/lz/lz_encoder_hash_table.h
index 8c51717..2b3a60e 100644
--- a/src/liblzma/lz/lz_encoder_hash_table.h
+++ b/src/liblzma/lz/lz_encoder_hash_table.h
@@ -1,4 +1,6 @@
-/* This file has been automatically generated by crc32_tablegen.c. */
+// SPDX-License-Identifier: 0BSD
+
+// This file has been generated by crc32_tablegen.c.
const uint32_t lzma_lz_hash_table[256] = {
0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA,
diff --git a/src/liblzma/lz/lz_encoder_mf.c b/src/liblzma/lz/lz_encoder_mf.c
index 1fdc2d7..557c261 100644
--- a/src/liblzma/lz/lz_encoder_mf.c
+++ b/src/liblzma/lz/lz_encoder_mf.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lz_encoder_mf.c
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#include "lz_encoder.h"