summaryrefslogtreecommitdiffstats
path: root/src/liblzma/lz
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-15 09:41:34 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-15 09:41:34 +0000
commitcf178685aca107aa37c748de11da01562e78c46c (patch)
tree84d60b39c1744edcbdd4dbfc5026583914432dba /src/liblzma/lz
parentAdding upstream version 5.6.1+really5.4.5. (diff)
downloadxz-utils-upstream.tar.xz
xz-utils-upstream.zip
Adding upstream version 5.6.2.upstream/5.6.2upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/liblzma/lz/Makefile.inc6
-rw-r--r--src/liblzma/lz/lz_decoder.c48
-rw-r--r--src/liblzma/lz/lz_decoder.h116
-rw-r--r--src/liblzma/lz/lz_encoder.c13
-rw-r--r--src/liblzma/lz/lz_encoder.h21
-rw-r--r--src/liblzma/lz/lz_encoder_hash.h5
-rw-r--r--src/liblzma/lz/lz_encoder_hash_table.h4
-rw-r--r--src/liblzma/lz/lz_encoder_mf.c5
-rw-r--r--src/liblzma/lzma/Makefile.inc6
-rw-r--r--src/liblzma/lzma/fastpos.h5
-rw-r--r--src/liblzma/lzma/fastpos_table.c4
-rw-r--r--src/liblzma/lzma/fastpos_tablegen.c17
-rw-r--r--src/liblzma/lzma/lzma2_decoder.c5
-rw-r--r--src/liblzma/lzma/lzma2_decoder.h5
-rw-r--r--src/liblzma/lzma/lzma2_encoder.c8
-rw-r--r--src/liblzma/lzma/lzma2_encoder.h5
-rw-r--r--src/liblzma/lzma/lzma_common.h41
-rw-r--r--src/liblzma/lzma/lzma_decoder.c760
-rw-r--r--src/liblzma/lzma/lzma_decoder.h5
-rw-r--r--src/liblzma/lzma/lzma_encoder.c18
-rw-r--r--src/liblzma/lzma/lzma_encoder.h5
-rw-r--r--src/liblzma/lzma/lzma_encoder_optimum_fast.c5
-rw-r--r--src/liblzma/lzma/lzma_encoder_optimum_normal.c7
-rw-r--r--src/liblzma/lzma/lzma_encoder_presets.c5
-rw-r--r--src/liblzma/lzma/lzma_encoder_private.h9
25 files changed, 654 insertions, 474 deletions
diff --git a/src/liblzma/lz/Makefile.inc b/src/liblzma/lz/Makefile.inc
index 75742a8..15235d7 100644
--- a/src/liblzma/lz/Makefile.inc
+++ b/src/liblzma/lz/Makefile.inc
@@ -1,9 +1,5 @@
-##
+## SPDX-License-Identifier: 0BSD
## Author: Lasse Collin
-##
-## This file has been put into the public domain.
-## You can do whatever you want with this file.
-##
if COND_ENCODER_LZ
liblzma_la_SOURCES += \
diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c
index 06c95c1..92913f2 100644
--- a/src/liblzma/lz/lz_decoder.c
+++ b/src/liblzma/lz/lz_decoder.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lz_decoder.c
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
// liblzma supports multiple LZ77-based filters. The LZ part is shared
@@ -54,9 +53,10 @@ typedef struct {
static void
lz_decoder_reset(lzma_coder *coder)
{
- coder->dict.pos = 0;
+ coder->dict.pos = 2 * LZ_DICT_REPEAT_MAX;
coder->dict.full = 0;
- coder->dict.buf[coder->dict.size - 1] = '\0';
+ coder->dict.buf[2 * LZ_DICT_REPEAT_MAX - 1] = '\0';
+ coder->dict.has_wrapped = false;
coder->dict.need_reset = false;
return;
}
@@ -70,8 +70,15 @@ decode_buffer(lzma_coder *coder,
{
while (true) {
// Wrap the dictionary if needed.
- if (coder->dict.pos == coder->dict.size)
- coder->dict.pos = 0;
+ if (coder->dict.pos == coder->dict.size) {
+ // See the comment of #define LZ_DICT_REPEAT_MAX.
+ coder->dict.pos = LZ_DICT_REPEAT_MAX;
+ coder->dict.has_wrapped = true;
+ memcpy(coder->dict.buf, coder->dict.buf
+ + coder->dict.size
+ - LZ_DICT_REPEAT_MAX,
+ LZ_DICT_REPEAT_MAX);
+ }
// Store the current dictionary position. It is needed to know
// where to start copying to the out[] buffer.
@@ -253,21 +260,31 @@ lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
// dictionary to the output buffer, since applications are
// recommended to give aligned buffers to liblzma.
//
+ // Reserve 2 * LZ_DICT_REPEAT_MAX bytes of extra space which is
+ // needed for alloc_size.
+ //
// Avoid integer overflow.
- if (lz_options.dict_size > SIZE_MAX - 15)
+ if (lz_options.dict_size > SIZE_MAX - 15 - 2 * LZ_DICT_REPEAT_MAX)
return LZMA_MEM_ERROR;
lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15));
+ // Reserve extra space as explained in the comment
+ // of #define LZ_DICT_REPEAT_MAX.
+ const size_t alloc_size
+ = lz_options.dict_size + 2 * LZ_DICT_REPEAT_MAX;
+
// Allocate and initialize the dictionary.
- if (coder->dict.size != lz_options.dict_size) {
+ if (coder->dict.size != alloc_size) {
lzma_free(coder->dict.buf, allocator);
- coder->dict.buf
- = lzma_alloc(lz_options.dict_size, allocator);
+ coder->dict.buf = lzma_alloc(alloc_size, allocator);
if (coder->dict.buf == NULL)
return LZMA_MEM_ERROR;
- coder->dict.size = lz_options.dict_size;
+ // NOTE: Yes, alloc_size, not lz_options.dict_size. The way
+ // coder->dict.full is updated will take care that we will
+ // still reject distances larger than lz_options.dict_size.
+ coder->dict.size = alloc_size;
}
lz_decoder_reset(next->coder);
@@ -280,9 +297,12 @@ lzma_lz_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
const size_t copy_size = my_min(lz_options.preset_dict_size,
lz_options.dict_size);
const size_t offset = lz_options.preset_dict_size - copy_size;
- memcpy(coder->dict.buf, lz_options.preset_dict + offset,
+ memcpy(coder->dict.buf + coder->dict.pos,
+ lz_options.preset_dict + offset,
copy_size);
- coder->dict.pos = copy_size;
+
+ // dict.pos isn't zero after lz_decoder_reset().
+ coder->dict.pos += copy_size;
coder->dict.full = copy_size;
}
diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h
index ad80d4d..cb61b6e 100644
--- a/src/liblzma/lz/lz_decoder.h
+++ b/src/liblzma/lz/lz_decoder.h
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lz_decoder.h
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_LZ_DECODER_H
@@ -17,10 +16,28 @@
#include "common.h"
+/// Maximum length of a match rounded up to a nice power of 2 which is
+/// a good size for aligned memcpy(). The allocated dictionary buffer will
+/// be 2 * LZ_DICT_REPEAT_MAX bytes larger than the actual dictionary size:
+///
+/// (1) Every time the decoder reaches the end of the dictionary buffer,
+/// the last LZ_DICT_REPEAT_MAX bytes will be copied to the beginning.
+/// This way dict_repeat() will only need to copy from one place,
+/// never from both the end and beginning of the buffer.
+///
+/// (2) The other LZ_DICT_REPEAT_MAX bytes is kept as a buffer between
+/// the oldest byte still in the dictionary and the current write
+/// position. This way dict_repeat(dict, dict->size - 1, &len)
+/// won't need memmove() as the copying cannot overlap.
+///
+/// Note that memcpy() still cannot be used if distance < len.
+///
+/// LZMA's longest match length is 273 so pick a multiple of 16 above that.
+#define LZ_DICT_REPEAT_MAX 288
+
+
typedef struct {
- /// Pointer to the dictionary buffer. It can be an allocated buffer
- /// internal to liblzma, or it can a be a buffer given by the
- /// application when in single-call mode (not implemented yet).
+ /// Pointer to the dictionary buffer.
uint8_t *buf;
/// Write position in dictionary. The next byte will be written to
@@ -35,9 +52,16 @@ typedef struct {
/// Write limit
size_t limit;
- /// Size of the dictionary
+ /// Allocated size of buf. This is 2 * LZ_DICT_REPEAT_MAX bytes
+ /// larger than the actual dictionary size. This is enforced by
+ /// how the value for "full" is set; it can be at most
+ /// "size - 2 * LZ_DICT_REPEAT_MAX".
size_t size;
+ /// True once the dictionary has become full and the writing position
+ /// has been wrapped in decode_buffer() in lz_decoder.c.
+ bool has_wrapped;
+
/// True when dictionary should be reset before decoding more data.
bool need_reset;
@@ -103,7 +127,16 @@ static inline uint8_t
dict_get(const lzma_dict *const dict, const uint32_t distance)
{
return dict->buf[dict->pos - distance - 1
- + (distance < dict->pos ? 0 : dict->size)];
+ + (distance < dict->pos
+ ? 0 : dict->size - LZ_DICT_REPEAT_MAX)];
+}
+
+
+/// Optimized version of dict_get(dict, 0)
+static inline uint8_t
+dict_get0(const lzma_dict *const dict)
+{
+ return dict->buf[dict->pos - 1];
}
@@ -132,68 +165,51 @@ dict_repeat(lzma_dict *dict, uint32_t distance, uint32_t *len)
uint32_t left = my_min(dict_avail, *len);
*len -= left;
+ size_t back = dict->pos - distance - 1;
+ if (distance >= dict->pos)
+ back += dict->size - LZ_DICT_REPEAT_MAX;
+
// Repeat a block of data from the history. Because memcpy() is faster
// than copying byte by byte in a loop, the copying process gets split
- // into three cases.
+ // into two cases.
if (distance < left) {
// Source and target areas overlap, thus we can't use
// memcpy() nor even memmove() safely.
do {
- dict->buf[dict->pos] = dict_get(dict, distance);
- ++dict->pos;
+ dict->buf[dict->pos++] = dict->buf[back++];
} while (--left > 0);
-
- } else if (distance < dict->pos) {
- // The easiest and fastest case
- memcpy(dict->buf + dict->pos,
- dict->buf + dict->pos - distance - 1,
- left);
- dict->pos += left;
-
} else {
- // The bigger the dictionary, the more rare this
- // case occurs. We need to "wrap" the dict, thus
- // we might need two memcpy() to copy all the data.
- assert(dict->full == dict->size);
- const uint32_t copy_pos
- = dict->pos - distance - 1 + dict->size;
- uint32_t copy_size = dict->size - copy_pos;
-
- if (copy_size < left) {
- memmove(dict->buf + dict->pos, dict->buf + copy_pos,
- copy_size);
- dict->pos += copy_size;
- copy_size = left - copy_size;
- memcpy(dict->buf + dict->pos, dict->buf, copy_size);
- dict->pos += copy_size;
- } else {
- memmove(dict->buf + dict->pos, dict->buf + copy_pos,
- left);
- dict->pos += left;
- }
+ memcpy(dict->buf + dict->pos, dict->buf + back, left);
+ dict->pos += left;
}
// Update how full the dictionary is.
- if (dict->full < dict->pos)
- dict->full = dict->pos;
+ if (!dict->has_wrapped)
+ dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
- return unlikely(*len != 0);
+ return *len != 0;
+}
+
+
+static inline void
+dict_put(lzma_dict *dict, uint8_t byte)
+{
+ dict->buf[dict->pos++] = byte;
+
+ if (!dict->has_wrapped)
+ dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
}
/// Puts one byte into the dictionary. Returns true if the dictionary was
/// already full and the byte couldn't be added.
static inline bool
-dict_put(lzma_dict *dict, uint8_t byte)
+dict_put_safe(lzma_dict *dict, uint8_t byte)
{
if (unlikely(dict->pos == dict->limit))
return true;
- dict->buf[dict->pos++] = byte;
-
- if (dict->pos > dict->full)
- dict->full = dict->pos;
-
+ dict_put(dict, byte);
return false;
}
@@ -217,8 +233,8 @@ dict_write(lzma_dict *restrict dict, const uint8_t *restrict in,
*left -= lzma_bufcpy(in, in_pos, in_size,
dict->buf, &dict->pos, dict->limit);
- if (dict->pos > dict->full)
- dict->full = dict->pos;
+ if (!dict->has_wrapped)
+ dict->full = dict->pos - 2 * LZ_DICT_REPEAT_MAX;
return;
}
diff --git a/src/liblzma/lz/lz_encoder.c b/src/liblzma/lz/lz_encoder.c
index 5489085..4af23e1 100644
--- a/src/liblzma/lz/lz_encoder.c
+++ b/src/liblzma/lz/lz_encoder.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lz_encoder.c
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#include "lz_encoder.h"
@@ -196,9 +195,7 @@ lz_encoder_prepare(lzma_mf *mf, const lzma_allocator *allocator,
// For now, the dictionary size is limited to 1.5 GiB. This may grow
// in the future if needed, but it needs a little more work than just
// changing this check.
- if (lz_options->dict_size < LZMA_DICT_SIZE_MIN
- || lz_options->dict_size
- > (UINT32_C(1) << 30) + (UINT32_C(1) << 29)
+ if (!IS_ENC_DICT_SIZE_VALID(lz_options->dict_size)
|| lz_options->nice_len > lz_options->match_len_max)
return true;
@@ -549,7 +546,7 @@ lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
lzma_lz_options *lz_options))
{
#if defined(HAVE_SMALL) && !defined(HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR)
- // We need that the CRC32 table has been initialized.
+ // The CRC32 table must be initialized.
lzma_crc32_init();
#endif
@@ -569,6 +566,8 @@ lzma_lz_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
coder->lz.coder = NULL;
coder->lz.code = NULL;
coder->lz.end = NULL;
+ coder->lz.options_update = NULL;
+ coder->lz.set_out_limit = NULL;
// mf.size is initialized to silence Valgrind
// when used on optimized binaries (GCC may reorder
diff --git a/src/liblzma/lz/lz_encoder.h b/src/liblzma/lz/lz_encoder.h
index ffcba02..eb197c6 100644
--- a/src/liblzma/lz/lz_encoder.h
+++ b/src/liblzma/lz/lz_encoder.h
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lz_encoder.h
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_LZ_ENCODER_H
@@ -17,6 +16,14 @@
#include "common.h"
+// For now, the dictionary size is limited to 1.5 GiB. This may grow
+// in the future if needed, but it needs a little more work than just
+// changing this check.
+#define IS_ENC_DICT_SIZE_VALID(size) \
+ ((size) >= LZMA_DICT_SIZE_MIN \
+ && (size) <= (UINT32_C(1) << 30) + (UINT32_C(1) << 29))
+
+
/// A table of these is used by the LZ-based encoder to hold
/// the length-distance pairs found by the match finder.
typedef struct {
@@ -153,9 +160,13 @@ typedef struct {
/// Maximum search depth
uint32_t depth;
- /// TODO: Comment
+ /// Initial dictionary for the match finder to search.
const uint8_t *preset_dict;
+ /// If the preset dictionary is NULL, this value is ignored.
+ /// Otherwise this member must indicate the preset dictionary's
+ /// buffer size. If this size is larger than dict_size, then only
+ /// the dict_size sized tail of the preset_dict will be used.
uint32_t preset_dict_size;
} lzma_lz_options;
@@ -217,7 +228,7 @@ typedef struct {
// 3. The literals and matches are encoded using e.g. LZMA.
//
// The bytes that have been ran through the match finder, but not encoded yet,
-// are called `read ahead'.
+// are called 'read ahead'.
/// Get how many bytes the match finder hashes in its initial step.
diff --git a/src/liblzma/lz/lz_encoder_hash.h b/src/liblzma/lz/lz_encoder_hash.h
index 4d9971a..8ace82b 100644
--- a/src/liblzma/lz/lz_encoder_hash.h
+++ b/src/liblzma/lz/lz_encoder_hash.h
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lz_encoder_hash.h
@@ -5,9 +7,6 @@
//
// Author: Igor Pavlov
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_LZ_ENCODER_HASH_H
diff --git a/src/liblzma/lz/lz_encoder_hash_table.h b/src/liblzma/lz/lz_encoder_hash_table.h
index 8c51717..2b3a60e 100644
--- a/src/liblzma/lz/lz_encoder_hash_table.h
+++ b/src/liblzma/lz/lz_encoder_hash_table.h
@@ -1,4 +1,6 @@
-/* This file has been automatically generated by crc32_tablegen.c. */
+// SPDX-License-Identifier: 0BSD
+
+// This file has been generated by crc32_tablegen.c.
const uint32_t lzma_lz_hash_table[256] = {
0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA,
diff --git a/src/liblzma/lz/lz_encoder_mf.c b/src/liblzma/lz/lz_encoder_mf.c
index 1fdc2d7..557c261 100644
--- a/src/liblzma/lz/lz_encoder_mf.c
+++ b/src/liblzma/lz/lz_encoder_mf.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lz_encoder_mf.c
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#include "lz_encoder.h"
diff --git a/src/liblzma/lzma/Makefile.inc b/src/liblzma/lzma/Makefile.inc
index 25440d8..dca6b76 100644
--- a/src/liblzma/lzma/Makefile.inc
+++ b/src/liblzma/lzma/Makefile.inc
@@ -1,9 +1,5 @@
-##
+## SPDX-License-Identifier: 0BSD
## Author: Lasse Collin
-##
-## This file has been put into the public domain.
-## You can do whatever you want with this file.
-##
EXTRA_DIST += lzma/fastpos_tablegen.c
diff --git a/src/liblzma/lzma/fastpos.h b/src/liblzma/lzma/fastpos.h
index dbeb16f..d3969a7 100644
--- a/src/liblzma/lzma/fastpos.h
+++ b/src/liblzma/lzma/fastpos.h
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file fastpos.h
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_FASTPOS_H
diff --git a/src/liblzma/lzma/fastpos_table.c b/src/liblzma/lzma/fastpos_table.c
index 6a3ceac..4e10e37 100644
--- a/src/liblzma/lzma/fastpos_table.c
+++ b/src/liblzma/lzma/fastpos_table.c
@@ -1,4 +1,6 @@
-/* This file has been automatically generated by fastpos_tablegen.c. */
+// SPDX-License-Identifier: 0BSD
+
+// This file has been generated by fastpos_tablegen.c.
#include "common.h"
#include "fastpos.h"
diff --git a/src/liblzma/lzma/fastpos_tablegen.c b/src/liblzma/lzma/fastpos_tablegen.c
index 57ed150..957ccb7 100644
--- a/src/liblzma/lzma/fastpos_tablegen.c
+++ b/src/liblzma/lzma/fastpos_tablegen.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file fastpos_tablegen.c
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#include <inttypes.h>
@@ -35,11 +34,13 @@ main(void)
fastpos[c] = slot_fast;
}
- printf("/* This file has been automatically generated "
- "by fastpos_tablegen.c. */\n\n"
- "#include \"common.h\"\n"
- "#include \"fastpos.h\"\n\n"
- "const uint8_t lzma_fastpos[1 << FASTPOS_BITS] = {");
+ // Split the SPDX string so that it won't accidentally match
+ // when tools search for the string.
+ printf("// SPDX" "-License-Identifier" ": 0BSD\n\n"
+ "// This file has been generated by fastpos_tablegen.c.\n\n"
+ "#include \"common.h\"\n"
+ "#include \"fastpos.h\"\n\n"
+ "const uint8_t lzma_fastpos[1 << FASTPOS_BITS] = {");
for (size_t i = 0; i < (1 << FASTPOS_BITS); ++i) {
if (i % 16 == 0)
diff --git a/src/liblzma/lzma/lzma2_decoder.c b/src/liblzma/lzma/lzma2_decoder.c
index 567df49..37ab253 100644
--- a/src/liblzma/lzma/lzma2_decoder.c
+++ b/src/liblzma/lzma/lzma2_decoder.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lzma2_decoder.c
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#include "lzma2_decoder.h"
diff --git a/src/liblzma/lzma/lzma2_decoder.h b/src/liblzma/lzma/lzma2_decoder.h
index ef2dcbf..cdd8b46 100644
--- a/src/liblzma/lzma/lzma2_decoder.h
+++ b/src/liblzma/lzma/lzma2_decoder.h
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lzma2_decoder.h
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_LZMA2_DECODER_H
diff --git a/src/liblzma/lzma/lzma2_encoder.c b/src/liblzma/lzma/lzma2_encoder.c
index 4b6b231..e20b75b 100644
--- a/src/liblzma/lzma/lzma2_encoder.c
+++ b/src/liblzma/lzma/lzma2_encoder.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lzma2_encoder.c
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#include "lz_encoder.h"
@@ -409,6 +408,9 @@ lzma_lzma2_block_size(const void *options)
{
const lzma_options_lzma *const opt = options;
+ if (!IS_ENC_DICT_SIZE_VALID(opt->dict_size))
+ return UINT64_MAX;
+
// Use at least 1 MiB to keep compression ratio better.
return my_max((uint64_t)(opt->dict_size) * 3, UINT64_C(1) << 20);
}
diff --git a/src/liblzma/lzma/lzma2_encoder.h b/src/liblzma/lzma/lzma2_encoder.h
index 515f183..29966a6 100644
--- a/src/liblzma/lzma/lzma2_encoder.h
+++ b/src/liblzma/lzma/lzma2_encoder.h
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lzma2_encoder.h
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_LZMA2_ENCODER_H
diff --git a/src/liblzma/lzma/lzma_common.h b/src/liblzma/lzma/lzma_common.h
index 9d040d9..c3c587f 100644
--- a/src/liblzma/lzma/lzma_common.h
+++ b/src/liblzma/lzma/lzma_common.h
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lzma_common.h
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_LZMA_COMMON_H
@@ -84,6 +83,20 @@ typedef enum {
? (state) - 3 \
: (state) - 6))
+/// Like update_literal(state) but when it is already known that
+/// is_literal_state(state) is true.
+#define update_literal_normal(state) \
+ state = ((state) <= STATE_SHORTREP_LIT_LIT \
+ ? STATE_LIT_LIT \
+ : (state) - 3);
+
+/// Like update_literal(state) but when it is already known that
+/// is_literal_state(state) is false.
+#define update_literal_matched(state) \
+ state = ((state) <= STATE_LIT_SHORTREP \
+ ? (state) - 3 \
+ : (state) - 6);
+
/// Indicate that the latest state was a match.
#define update_match(state) \
state = ((state) < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH)
@@ -112,31 +125,33 @@ typedef enum {
///
/// Match byte is used when the previous LZMA symbol was something else than
/// a literal (that is, it was some kind of match).
-#define LITERAL_CODER_SIZE 0x300
+#define LITERAL_CODER_SIZE UINT32_C(0x300)
/// Maximum number of literal coders
#define LITERAL_CODERS_MAX (1 << LZMA_LCLP_MAX)
+/// Calculates the literal_mask that literal_subcoder() needs.
+#define literal_mask_calc(lc, lp) \
+ ((UINT32_C(0x100) << (lp)) - (UINT32_C(0x100) >> (lc)))
+
/// Locate the literal coder for the next literal byte. The choice depends on
/// - the lowest literal_pos_bits bits of the position of the current
/// byte; and
/// - the highest literal_context_bits bits of the previous byte.
-#define literal_subcoder(probs, lc, lp_mask, pos, prev_byte) \
- ((probs)[(((pos) & (lp_mask)) << (lc)) \
- + ((uint32_t)(prev_byte) >> (8U - (lc)))])
+#define literal_subcoder(probs, lc, literal_mask, pos, prev_byte) \
+ ((probs) + UINT32_C(3) * \
+ (((((pos) << 8) + (prev_byte)) & (literal_mask)) << (lc)))
static inline void
-literal_init(probability (*probs)[LITERAL_CODER_SIZE],
- uint32_t lc, uint32_t lp)
+literal_init(probability *probs, uint32_t lc, uint32_t lp)
{
assert(lc + lp <= LZMA_LCLP_MAX);
- const uint32_t coders = 1U << (lc + lp);
+ const size_t coders = LITERAL_CODER_SIZE << (lc + lp);
- for (uint32_t i = 0; i < coders; ++i)
- for (uint32_t j = 0; j < LITERAL_CODER_SIZE; ++j)
- bit_reset(probs[i][j]);
+ for (size_t i = 0; i < coders; ++i)
+ bit_reset(probs[i]);
return;
}
diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c
index 26c148a..0abed02 100644
--- a/src/liblzma/lzma/lzma_decoder.c
+++ b/src/liblzma/lzma/lzma_decoder.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lzma_decoder.c
@@ -5,9 +7,7 @@
///
// Authors: Igor Pavlov
// Lasse Collin
-//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
+// Jia Tan
//
///////////////////////////////////////////////////////////////////////////////
@@ -22,25 +22,20 @@
# pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
#endif
+// Minimum number of input bytes to safely decode one LZMA symbol.
+// The worst case is that we decode 22 bits using probabilities and 26
+// direct bits. This may decode at maximum 20 bytes of input.
+#define LZMA_IN_REQUIRED 20
-#ifdef HAVE_SMALL
// Macros for (somewhat) size-optimized code.
-#define seq_4(seq) seq
-
-#define seq_6(seq) seq
-
-#define seq_8(seq) seq
-
-#define seq_len(seq) \
- seq ## _CHOICE, \
- seq ## _CHOICE2, \
- seq ## _BITTREE
-
+// This is used to decode the match length (how many bytes must be repeated
+// from the dictionary). This version is used in the Resumable mode and
+// does not unroll any loops.
#define len_decode(target, ld, pos_state, seq) \
do { \
case seq ## _CHOICE: \
- rc_if_0(ld.choice, seq ## _CHOICE) { \
+ rc_if_0_safe(ld.choice, seq ## _CHOICE) { \
rc_update_0(ld.choice); \
probs = ld.low[pos_state];\
limit = LEN_LOW_SYMBOLS; \
@@ -48,7 +43,7 @@ case seq ## _CHOICE: \
} else { \
rc_update_1(ld.choice); \
case seq ## _CHOICE2: \
- rc_if_0(ld.choice2, seq ## _CHOICE2) { \
+ rc_if_0_safe(ld.choice2, seq ## _CHOICE2) { \
rc_update_0(ld.choice2); \
probs = ld.mid[pos_state]; \
limit = LEN_MID_SYMBOLS; \
@@ -64,98 +59,39 @@ case seq ## _CHOICE2: \
symbol = 1; \
case seq ## _BITTREE: \
do { \
- rc_bit(probs[symbol], , , seq ## _BITTREE); \
+ rc_bit_safe(probs[symbol], , , seq ## _BITTREE); \
} while (symbol < limit); \
target += symbol - limit; \
} while (0)
-#else // HAVE_SMALL
-
-// Unrolled versions
-#define seq_4(seq) \
- seq ## 0, \
- seq ## 1, \
- seq ## 2, \
- seq ## 3
-
-#define seq_6(seq) \
- seq ## 0, \
- seq ## 1, \
- seq ## 2, \
- seq ## 3, \
- seq ## 4, \
- seq ## 5
-
-#define seq_8(seq) \
- seq ## 0, \
- seq ## 1, \
- seq ## 2, \
- seq ## 3, \
- seq ## 4, \
- seq ## 5, \
- seq ## 6, \
- seq ## 7
-
-#define seq_len(seq) \
- seq ## _CHOICE, \
- seq ## _LOW0, \
- seq ## _LOW1, \
- seq ## _LOW2, \
- seq ## _CHOICE2, \
- seq ## _MID0, \
- seq ## _MID1, \
- seq ## _MID2, \
- seq ## _HIGH0, \
- seq ## _HIGH1, \
- seq ## _HIGH2, \
- seq ## _HIGH3, \
- seq ## _HIGH4, \
- seq ## _HIGH5, \
- seq ## _HIGH6, \
- seq ## _HIGH7
-#define len_decode(target, ld, pos_state, seq) \
+// This is the faster version of the match length decoder that does not
+// worry about being resumable. It unrolls the bittree decoding loop.
+#define len_decode_fast(target, ld, pos_state) \
do { \
symbol = 1; \
-case seq ## _CHOICE: \
- rc_if_0(ld.choice, seq ## _CHOICE) { \
+ rc_if_0(ld.choice) { \
rc_update_0(ld.choice); \
- rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW0); \
- rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW1); \
- rc_bit_case(ld.low[pos_state][symbol], , , seq ## _LOW2); \
- target = symbol - LEN_LOW_SYMBOLS + MATCH_LEN_MIN; \
+ rc_bittree3(ld.low[pos_state], \
+ -LEN_LOW_SYMBOLS + MATCH_LEN_MIN); \
+ target = symbol; \
} else { \
rc_update_1(ld.choice); \
-case seq ## _CHOICE2: \
- rc_if_0(ld.choice2, seq ## _CHOICE2) { \
+ rc_if_0(ld.choice2) { \
rc_update_0(ld.choice2); \
- rc_bit_case(ld.mid[pos_state][symbol], , , \
- seq ## _MID0); \
- rc_bit_case(ld.mid[pos_state][symbol], , , \
- seq ## _MID1); \
- rc_bit_case(ld.mid[pos_state][symbol], , , \
- seq ## _MID2); \
- target = symbol - LEN_MID_SYMBOLS \
- + MATCH_LEN_MIN + LEN_LOW_SYMBOLS; \
+ rc_bittree3(ld.mid[pos_state], -LEN_MID_SYMBOLS \
+ + MATCH_LEN_MIN + LEN_LOW_SYMBOLS); \
+ target = symbol; \
} else { \
rc_update_1(ld.choice2); \
- rc_bit_case(ld.high[symbol], , , seq ## _HIGH0); \
- rc_bit_case(ld.high[symbol], , , seq ## _HIGH1); \
- rc_bit_case(ld.high[symbol], , , seq ## _HIGH2); \
- rc_bit_case(ld.high[symbol], , , seq ## _HIGH3); \
- rc_bit_case(ld.high[symbol], , , seq ## _HIGH4); \
- rc_bit_case(ld.high[symbol], , , seq ## _HIGH5); \
- rc_bit_case(ld.high[symbol], , , seq ## _HIGH6); \
- rc_bit_case(ld.high[symbol], , , seq ## _HIGH7); \
- target = symbol - LEN_HIGH_SYMBOLS \
+ rc_bittree8(ld.high, -LEN_HIGH_SYMBOLS \
+ MATCH_LEN_MIN \
- + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS; \
+ + LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS); \
+ target = symbol; \
} \
} \
} while (0)
-#endif // HAVE_SMALL
-
/// Length decoder probabilities; see comments in lzma_common.h.
typedef struct {
@@ -173,7 +109,7 @@ typedef struct {
///////////////////
/// Literals; see comments in lzma_common.h.
- probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
+ probability literal[LITERAL_CODERS_MAX * LITERAL_CODER_SIZE];
/// If 1, it's a match. Otherwise it's a single 8-bit literal.
probability is_match[STATES][POS_STATES_MAX];
@@ -232,7 +168,7 @@ typedef struct {
uint32_t pos_mask; // (1U << pb) - 1
uint32_t literal_context_bits;
- uint32_t literal_pos_mask;
+ uint32_t literal_mask;
/// Uncompressed size as bytes, or LZMA_VLI_UNKNOWN if end of
/// payload marker is expected.
@@ -251,22 +187,26 @@ typedef struct {
enum {
SEQ_NORMALIZE,
SEQ_IS_MATCH,
- seq_8(SEQ_LITERAL),
- seq_8(SEQ_LITERAL_MATCHED),
+ SEQ_LITERAL,
+ SEQ_LITERAL_MATCHED,
SEQ_LITERAL_WRITE,
SEQ_IS_REP,
- seq_len(SEQ_MATCH_LEN),
- seq_6(SEQ_DIST_SLOT),
+ SEQ_MATCH_LEN_CHOICE,
+ SEQ_MATCH_LEN_CHOICE2,
+ SEQ_MATCH_LEN_BITTREE,
+ SEQ_DIST_SLOT,
SEQ_DIST_MODEL,
SEQ_DIRECT,
- seq_4(SEQ_ALIGN),
+ SEQ_ALIGN,
SEQ_EOPM,
SEQ_IS_REP0,
SEQ_SHORTREP,
SEQ_IS_REP0_LONG,
SEQ_IS_REP1,
SEQ_IS_REP2,
- seq_len(SEQ_REP_LEN),
+ SEQ_REP_LEN_CHOICE,
+ SEQ_REP_LEN_CHOICE2,
+ SEQ_REP_LEN_BITTREE,
SEQ_COPY,
} sequence;
@@ -321,7 +261,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
const size_t dict_start = dict.pos;
// Range decoder
- rc_to_local(coder->rc, *in_pos);
+ rc_to_local(coder->rc, *in_pos, LZMA_IN_REQUIRED);
// State
uint32_t state = coder->state;
@@ -340,7 +280,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
uint32_t offset = coder->offset;
uint32_t len = coder->len;
- const uint32_t literal_pos_mask = coder->literal_pos_mask;
+ const uint32_t literal_mask = coder->literal_mask;
const uint32_t literal_context_bits = coder->literal_context_bits;
// Temporary variables
@@ -367,8 +307,24 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
might_finish_without_eopm = true;
}
- // The main decoder loop. The "switch" is used to restart the decoder at
- // correct location. Once restarted, the "switch" is no longer used.
+ // The main decoder loop. The "switch" is used to resume the decoder at
+ // correct location. Once resumed, the "switch" is no longer used.
+ // The decoder loops is split into two modes:
+ //
+ // 1 - Non-resumable mode (fast). This is used when it is guaranteed
+ // there is enough input to decode the next symbol. If the output
+ // limit is reached, then the decoder loop will save the place
+ // for the resumable mode to continue. This mode is not used if
+ // HAVE_SMALL is defined. This is faster than Resumable mode
+ // because it reduces the number of branches needed and allows
+ // for more compiler optimizations.
+ //
+ // 2 - Resumable mode (slow). This is used when a previous decoder
+ // loop did not have enough space in the input or output buffers
+ // to complete. It uses sequence enum values to set remind
+ // coder->sequence where to resume in the decoder loop. This
+ // is the only mode used when HAVE_SMALL is defined.
+
switch (coder->sequence)
while (true) {
// Calculate new pos_state. This is skipped on the first loop
@@ -376,13 +332,339 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
// variables.
pos_state = dict.pos & pos_mask;
+#ifndef HAVE_SMALL
+
+ ///////////////////////////////
+ // Non-resumable Mode (fast) //
+ ///////////////////////////////
+
+ // Go to Resumable mode (1) if there is not enough input to
+ // safely decode any possible LZMA symbol or (2) if the
+ // dictionary is full, which may need special checks that
+ // are only done in the Resumable mode.
+ if (unlikely(!rc_is_fast_allowed()
+ || dict.pos == dict.limit))
+ goto slow;
+
+ // Decode the first bit from the next LZMA symbol.
+ // If the bit is a 0, then we handle it as a literal.
+ // If the bit is a 1, then it is a match of previously
+ // decoded data.
+ rc_if_0(coder->is_match[state][pos_state]) {
+ /////////////////////
+ // Decode literal. //
+ /////////////////////
+
+ // Update the RC that we have decoded a 0.
+ rc_update_0(coder->is_match[state][pos_state]);
+
+ // Get the correct probability array from lp and
+ // lc params.
+ probs = literal_subcoder(coder->literal,
+ literal_context_bits, literal_mask,
+ dict.pos, dict_get0(&dict));
+
+ if (is_literal_state(state)) {
+ update_literal_normal(state);
+
+ // Decode literal without match byte.
+ rc_bittree8(probs, 0);
+ } else {
+ update_literal_matched(state);
+
+ // Decode literal with match byte.
+ rc_matched_literal(probs,
+ dict_get(&dict, rep0));
+ }
+
+ // Write decoded literal to dictionary
+ dict_put(&dict, symbol);
+ continue;
+ }
+
+ ///////////////////
+ // Decode match. //
+ ///////////////////
+
+ // Instead of a new byte we are going to decode a
+ // distance-length pair. The distance represents how far
+ // back in the dictionary to begin copying. The length
+ // represents how many bytes to copy.
+
+ rc_update_1(coder->is_match[state][pos_state]);
+
+ rc_if_0(coder->is_rep[state]) {
+ ///////////////////
+ // Simple match. //
+ ///////////////////
+
+ // Not a repeated match. In this case,
+ // the length (how many bytes to copy) must be
+ // decoded first. Then, the distance (where to
+ // start copying) is decoded.
+ //
+ // This is also how we know when we are done
+ // decoding. If the distance decodes to UINT32_MAX,
+ // then we know to stop decoding (end of payload
+ // marker).
+
+ rc_update_0(coder->is_rep[state]);
+ update_match(state);
+
+ // The latest three match distances are kept in
+ // memory in case there are repeated matches.
+ rep3 = rep2;
+ rep2 = rep1;
+ rep1 = rep0;
+
+ // Decode the length of the match.
+ len_decode_fast(len, coder->match_len_decoder,
+ pos_state);
+
+ // Next, decode the distance into rep0.
+
+ // The next 6 bits determine how to decode the
+ // rest of the distance.
+ probs = coder->dist_slot[get_dist_state(len)];
+
+ rc_bittree6(probs, -DIST_SLOTS);
+ assert(symbol <= 63);
+
+ if (symbol < DIST_MODEL_START) {
+ // If the decoded symbol is < DIST_MODEL_START
+ // then we use its value directly as the
+ // match distance. No other bits are needed.
+ // The only possible distance values
+ // are [0, 3].
+ rep0 = symbol;
+ } else {
+ // Use the first two bits of symbol as the
+ // highest bits of the match distance.
+
+ // "limit" represents the number of low bits
+ // to decode.
+ limit = (symbol >> 1) - 1;
+ assert(limit >= 1 && limit <= 30);
+ rep0 = 2 + (symbol & 1);
+
+ if (symbol < DIST_MODEL_END) {
+ // When symbol is > DIST_MODEL_START,
+ // but symbol < DIST_MODEL_END, then
+ // it can decode distances between
+ // [4, 127].
+ assert(limit <= 5);
+ rep0 <<= limit;
+ assert(rep0 <= 96);
+
+ // -1 is fine, because we start
+ // decoding at probs[1], not probs[0].
+ // NOTE: This violates the C standard,
+ // since we are doing pointer
+ // arithmetic past the beginning of
+ // the array.
+ assert((int32_t)(rep0 - symbol - 1)
+ >= -1);
+ assert((int32_t)(rep0 - symbol - 1)
+ <= 82);
+ probs = coder->pos_special + rep0
+ - symbol - 1;
+ symbol = 1;
+ offset = 1;
+
+ // Variable number (1-5) of bits
+ // from a reverse bittree. This
+ // isn't worth manual unrolling.
+ //
+ // NOTE: Making one or many of the
+ // variables (probs, symbol, offset,
+ // or limit) local here (instead of
+ // using those declared outside the
+ // main loop) can affect code size
+ // and performance which isn't a
+ // surprise but it's not so clear
+ // what is the best.
+ do {
+ rc_bit_add_if_1(probs,
+ rep0, offset);
+ offset <<= 1;
+ } while (--limit > 0);
+ } else {
+ // The distance is >= 128. Decode the
+ // lower bits without probabilities
+ // except the lowest four bits.
+ assert(symbol >= 14);
+ assert(limit >= 6);
+
+ limit -= ALIGN_BITS;
+ assert(limit >= 2);
+
+ rc_direct(rep0, limit);
+
+ // Decode the lowest four bits using
+ // probabilities.
+ rep0 <<= ALIGN_BITS;
+ rc_bittree_rev4(coder->pos_align);
+ rep0 += symbol;
+
+ // If the end of payload marker (EOPM)
+ // is detected, jump to the safe code.
+ // The EOPM handling isn't speed
+ // critical at all.
+ //
+ // A final normalization is needed
+ // after the EOPM (there can be a
+ // dummy byte to read in some cases).
+ // If the normalization was done here
+ // in the fast code, it would need to
+ // be taken into account in the value
+ // of LZMA_IN_REQUIRED. Using the
+ // safe code allows keeping
+ // LZMA_IN_REQUIRED as 20 instead of
+ // 21.
+ if (rep0 == UINT32_MAX)
+ goto eopm;
+ }
+ }
+
+ // Validate the distance we just decoded.
+ if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
+ ret = LZMA_DATA_ERROR;
+ goto out;
+ }
+
+ } else {
+ rc_update_1(coder->is_rep[state]);
+
+ /////////////////////
+ // Repeated match. //
+ /////////////////////
+
+ // The match distance is a value that we have decoded
+ // recently. The latest four match distances are
+ // available as rep0, rep1, rep2 and rep3. We will
+ // now decode which of them is the new distance.
+ //
+ // There cannot be a match if we haven't produced
+ // any output, so check that first.
+ if (unlikely(!dict_is_distance_valid(&dict, 0))) {
+ ret = LZMA_DATA_ERROR;
+ goto out;
+ }
+
+ rc_if_0(coder->is_rep0[state]) {
+ rc_update_0(coder->is_rep0[state]);
+ // The distance is rep0.
+
+ // Decode the next bit to determine if 1 byte
+ // should be copied from rep0 distance or
+ // if the number of bytes needs to be decoded.
+
+ // If the next bit is 0, then it is a
+ // "Short Rep Match" and only 1 bit is copied.
+ // Otherwise, the length of the match is
+ // decoded after the "else" statement.
+ rc_if_0(coder->is_rep0_long[state][pos_state]) {
+ rc_update_0(coder->is_rep0_long[
+ state][pos_state]);
+
+ update_short_rep(state);
+ dict_put(&dict, dict_get(&dict, rep0));
+ continue;
+ }
+
+ // Repeating more than one byte at
+ // distance of rep0.
+ rc_update_1(coder->is_rep0_long[
+ state][pos_state]);
+
+ } else {
+ rc_update_1(coder->is_rep0[state]);
+
+ // The distance is rep1, rep2 or rep3. Once
+ // we find out which one of these three, it
+ // is stored to rep0 and rep1, rep2 and rep3
+ // are updated accordingly. There is no
+ // "Short Rep Match" option, so the length
+ // of the match must always be decoded next.
+ rc_if_0(coder->is_rep1[state]) {
+ // The distance is rep1.
+ rc_update_0(coder->is_rep1[state]);
+
+ const uint32_t distance = rep1;
+ rep1 = rep0;
+ rep0 = distance;
+
+ } else {
+ rc_update_1(coder->is_rep1[state]);
+
+ rc_if_0(coder->is_rep2[state]) {
+ // The distance is rep2.
+ rc_update_0(coder->is_rep2[
+ state]);
+
+ const uint32_t distance = rep2;
+ rep2 = rep1;
+ rep1 = rep0;
+ rep0 = distance;
+
+ } else {
+ // The distance is rep3.
+ rc_update_1(coder->is_rep2[
+ state]);
+
+ const uint32_t distance = rep3;
+ rep3 = rep2;
+ rep2 = rep1;
+ rep1 = rep0;
+ rep0 = distance;
+ }
+ }
+ }
+
+ update_long_rep(state);
+
+ // Decode the length of the repeated match.
+ len_decode_fast(len, coder->rep_len_decoder,
+ pos_state);
+ }
+
+ /////////////////////////////////
+ // Repeat from history buffer. //
+ /////////////////////////////////
+
+ // The length is always between these limits. There is no way
+ // to trigger the algorithm to set len outside this range.
+ assert(len >= MATCH_LEN_MIN);
+ assert(len <= MATCH_LEN_MAX);
+
+ // Repeat len bytes from distance of rep0.
+ if (unlikely(dict_repeat(&dict, rep0, &len))) {
+ coder->sequence = SEQ_COPY;
+ goto out;
+ }
+
+ continue;
+
+slow:
+#endif
+ ///////////////////////////
+ // Resumable Mode (slow) //
+ ///////////////////////////
+
+ // This is very similar to Non-resumable Mode, so most of the
+ // comments are not repeated. The main differences are:
+ // - case labels are used to resume at the correct location.
+ // - Loops are not unrolled.
+ // - Range coder macros take an extra sequence argument
+ // so they can save to coder->sequence the location to
+ // resume in case there is not enough input.
case SEQ_NORMALIZE:
case SEQ_IS_MATCH:
if (unlikely(might_finish_without_eopm
&& dict.pos == dict.limit)) {
// In rare cases there is a useless byte that needs
// to be read anyway.
- rc_normalize(SEQ_NORMALIZE);
+ rc_normalize_safe(SEQ_NORMALIZE);
// If the range decoder state is such that we can
// be at the end of the LZMA stream, then the
@@ -405,49 +687,37 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
eopm_is_valid = true;
}
- rc_if_0(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
- rc_update_0(coder->is_match[state][pos_state]);
+ rc_if_0_safe(coder->is_match[state][pos_state], SEQ_IS_MATCH) {
+ /////////////////////
+ // Decode literal. //
+ /////////////////////
- // It's a literal i.e. a single 8-bit byte.
+ rc_update_0(coder->is_match[state][pos_state]);
probs = literal_subcoder(coder->literal,
- literal_context_bits, literal_pos_mask,
- dict.pos, dict_get(&dict, 0));
+ literal_context_bits, literal_mask,
+ dict.pos, dict_get0(&dict));
symbol = 1;
if (is_literal_state(state)) {
+ update_literal_normal(state);
+
// Decode literal without match byte.
-#ifdef HAVE_SMALL
+ // The "slow" version does not unroll
+ // the loop.
case SEQ_LITERAL:
do {
- rc_bit(probs[symbol], , , SEQ_LITERAL);
+ rc_bit_safe(probs[symbol], , ,
+ SEQ_LITERAL);
} while (symbol < (1 << 8));
-#else
- rc_bit_case(probs[symbol], , , SEQ_LITERAL0);
- rc_bit_case(probs[symbol], , , SEQ_LITERAL1);
- rc_bit_case(probs[symbol], , , SEQ_LITERAL2);
- rc_bit_case(probs[symbol], , , SEQ_LITERAL3);
- rc_bit_case(probs[symbol], , , SEQ_LITERAL4);
- rc_bit_case(probs[symbol], , , SEQ_LITERAL5);
- rc_bit_case(probs[symbol], , , SEQ_LITERAL6);
- rc_bit_case(probs[symbol], , , SEQ_LITERAL7);
-#endif
} else {
+ update_literal_matched(state);
+
// Decode literal with match byte.
- //
- // We store the byte we compare against
- // ("match byte") to "len" to minimize the
- // number of variables we need to store
- // between decoder calls.
len = (uint32_t)(dict_get(&dict, rep0)) << 1;
- // The usage of "offset" allows omitting some
- // branches, which should give tiny speed
- // improvement on some CPUs. "offset" gets
- // set to zero if match_bit didn't match.
offset = 0x100;
-#ifdef HAVE_SMALL
case SEQ_LITERAL_MATCHED:
do {
const uint32_t match_bit
@@ -456,7 +726,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
= offset + match_bit
+ symbol;
- rc_bit(probs[subcoder_index],
+ rc_bit_safe(probs[subcoder_index],
offset &= ~match_bit,
offset &= match_bit,
SEQ_LITERAL_MATCHED);
@@ -469,61 +739,10 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
len <<= 1;
} while (symbol < (1 << 8));
-#else
- // Unroll the loop.
- uint32_t match_bit;
- uint32_t subcoder_index;
-
-# define d(seq) \
- case seq: \
- match_bit = len & offset; \
- subcoder_index = offset + match_bit + symbol; \
- rc_bit(probs[subcoder_index], \
- offset &= ~match_bit, \
- offset &= match_bit, \
- seq)
-
- d(SEQ_LITERAL_MATCHED0);
- len <<= 1;
- d(SEQ_LITERAL_MATCHED1);
- len <<= 1;
- d(SEQ_LITERAL_MATCHED2);
- len <<= 1;
- d(SEQ_LITERAL_MATCHED3);
- len <<= 1;
- d(SEQ_LITERAL_MATCHED4);
- len <<= 1;
- d(SEQ_LITERAL_MATCHED5);
- len <<= 1;
- d(SEQ_LITERAL_MATCHED6);
- len <<= 1;
- d(SEQ_LITERAL_MATCHED7);
-# undef d
-#endif
}
- //update_literal(state);
- // Use a lookup table to update to literal state,
- // since compared to other state updates, this would
- // need two branches.
- static const lzma_lzma_state next_state[] = {
- STATE_LIT_LIT,
- STATE_LIT_LIT,
- STATE_LIT_LIT,
- STATE_LIT_LIT,
- STATE_MATCH_LIT_LIT,
- STATE_REP_LIT_LIT,
- STATE_SHORTREP_LIT_LIT,
- STATE_MATCH_LIT,
- STATE_REP_LIT,
- STATE_SHORTREP_LIT,
- STATE_MATCH_LIT,
- STATE_REP_LIT
- };
- state = next_state[state];
-
case SEQ_LITERAL_WRITE:
- if (unlikely(dict_put(&dict, symbol))) {
+ if (dict_put_safe(&dict, symbol)) {
coder->sequence = SEQ_LITERAL_WRITE;
goto out;
}
@@ -531,64 +750,47 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
continue;
}
- // Instead of a new byte we are going to get a byte range
- // (distance and length) which will be repeated from our
- // output history.
+ ///////////////////
+ // Decode match. //
+ ///////////////////
rc_update_1(coder->is_match[state][pos_state]);
case SEQ_IS_REP:
- rc_if_0(coder->is_rep[state], SEQ_IS_REP) {
- // Not a repeated match
+ rc_if_0_safe(coder->is_rep[state], SEQ_IS_REP) {
+ ///////////////////
+ // Simple match. //
+ ///////////////////
+
rc_update_0(coder->is_rep[state]);
update_match(state);
- // The latest three match distances are kept in
- // memory in case there are repeated matches.
rep3 = rep2;
rep2 = rep1;
rep1 = rep0;
- // Decode the length of the match.
len_decode(len, coder->match_len_decoder,
pos_state, SEQ_MATCH_LEN);
- // Prepare to decode the highest two bits of the
- // match distance.
probs = coder->dist_slot[get_dist_state(len)];
symbol = 1;
-#ifdef HAVE_SMALL
case SEQ_DIST_SLOT:
do {
- rc_bit(probs[symbol], , , SEQ_DIST_SLOT);
+ rc_bit_safe(probs[symbol], , , SEQ_DIST_SLOT);
} while (symbol < DIST_SLOTS);
-#else
- rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT0);
- rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT1);
- rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT2);
- rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT3);
- rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT4);
- rc_bit_case(probs[symbol], , , SEQ_DIST_SLOT5);
-#endif
- // Get rid of the highest bit that was needed for
- // indexing of the probability array.
+
symbol -= DIST_SLOTS;
assert(symbol <= 63);
if (symbol < DIST_MODEL_START) {
- // Match distances [0, 3] have only two bits.
rep0 = symbol;
} else {
- // Decode the lowest [1, 29] bits of
- // the match distance.
limit = (symbol >> 1) - 1;
assert(limit >= 1 && limit <= 30);
rep0 = 2 + (symbol & 1);
if (symbol < DIST_MODEL_END) {
- // Prepare to decode the low bits for
- // a distance of [4, 127].
assert(limit <= 5);
rep0 <<= limit;
assert(rep0 <= 96);
@@ -607,95 +809,36 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
symbol = 1;
offset = 0;
case SEQ_DIST_MODEL:
-#ifdef HAVE_SMALL
do {
- rc_bit(probs[symbol], ,
+ rc_bit_safe(probs[symbol], ,
rep0 += 1U << offset,
SEQ_DIST_MODEL);
} while (++offset < limit);
-#else
- switch (limit) {
- case 5:
- assert(offset == 0);
- rc_bit(probs[symbol], ,
- rep0 += 1U,
- SEQ_DIST_MODEL);
- ++offset;
- --limit;
- case 4:
- rc_bit(probs[symbol], ,
- rep0 += 1U << offset,
- SEQ_DIST_MODEL);
- ++offset;
- --limit;
- case 3:
- rc_bit(probs[symbol], ,
- rep0 += 1U << offset,
- SEQ_DIST_MODEL);
- ++offset;
- --limit;
- case 2:
- rc_bit(probs[symbol], ,
- rep0 += 1U << offset,
- SEQ_DIST_MODEL);
- ++offset;
- --limit;
- case 1:
- // We need "symbol" only for
- // indexing the probability
- // array, thus we can use
- // rc_bit_last() here to omit
- // the unneeded updating of
- // "symbol".
- rc_bit_last(probs[symbol], ,
- rep0 += 1U << offset,
- SEQ_DIST_MODEL);
- }
-#endif
} else {
- // The distance is >= 128. Decode the
- // lower bits without probabilities
- // except the lowest four bits.
assert(symbol >= 14);
assert(limit >= 6);
limit -= ALIGN_BITS;
assert(limit >= 2);
case SEQ_DIRECT:
- // Not worth manual unrolling
- do {
- rc_direct(rep0, SEQ_DIRECT);
- } while (--limit > 0);
+ rc_direct_safe(rep0, limit,
+ SEQ_DIRECT);
- // Decode the lowest four bits using
- // probabilities.
rep0 <<= ALIGN_BITS;
- symbol = 1;
-#ifdef HAVE_SMALL
- offset = 0;
+ symbol = 0;
+ offset = 1;
case SEQ_ALIGN:
do {
- rc_bit(coder->pos_align[
- symbol], ,
- rep0 += 1U << offset,
+ rc_bit_last_safe(
+ coder->pos_align[
+ offset
+ + symbol],
+ ,
+ symbol += offset,
SEQ_ALIGN);
- } while (++offset < ALIGN_BITS);
-#else
- case SEQ_ALIGN0:
- rc_bit(coder->pos_align[symbol], ,
- rep0 += 1, SEQ_ALIGN0);
- case SEQ_ALIGN1:
- rc_bit(coder->pos_align[symbol], ,
- rep0 += 2, SEQ_ALIGN1);
- case SEQ_ALIGN2:
- rc_bit(coder->pos_align[symbol], ,
- rep0 += 4, SEQ_ALIGN2);
- case SEQ_ALIGN3:
- // Like in SEQ_DIST_MODEL, we don't
- // need "symbol" for anything else
- // than indexing the probability array.
- rc_bit_last(coder->pos_align[symbol], ,
- rep0 += 8, SEQ_ALIGN3);
-#endif
+ offset <<= 1;
+ } while (offset < ALIGN_SIZE);
+
+ rep0 += symbol;
if (rep0 == UINT32_MAX) {
// End of payload marker was
@@ -710,6 +853,9 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
// that EOPM might be used
// (it's not allowed in
// LZMA2).
+#ifndef HAVE_SMALL
+eopm:
+#endif
if (!eopm_is_valid) {
ret = LZMA_DATA_ERROR;
goto out;
@@ -718,7 +864,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
case SEQ_EOPM:
// LZMA1 stream with
// end-of-payload marker.
- rc_normalize(SEQ_EOPM);
+ rc_normalize_safe(SEQ_EOPM);
ret = rc_is_finished(rc)
? LZMA_STREAM_END
: LZMA_DATA_ERROR;
@@ -727,36 +873,30 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
}
}
- // Validate the distance we just decoded.
if (unlikely(!dict_is_distance_valid(&dict, rep0))) {
ret = LZMA_DATA_ERROR;
goto out;
}
} else {
+ /////////////////////
+ // Repeated match. //
+ /////////////////////
+
rc_update_1(coder->is_rep[state]);
- // Repeated match
- //
- // The match distance is a value that we have had
- // earlier. The latest four match distances are
- // available as rep0, rep1, rep2 and rep3. We will
- // now decode which of them is the new distance.
- //
- // There cannot be a match if we haven't produced
- // any output, so check that first.
if (unlikely(!dict_is_distance_valid(&dict, 0))) {
ret = LZMA_DATA_ERROR;
goto out;
}
case SEQ_IS_REP0:
- rc_if_0(coder->is_rep0[state], SEQ_IS_REP0) {
+ rc_if_0_safe(coder->is_rep0[state], SEQ_IS_REP0) {
rc_update_0(coder->is_rep0[state]);
- // The distance is rep0.
case SEQ_IS_REP0_LONG:
- rc_if_0(coder->is_rep0_long[state][pos_state],
+ rc_if_0_safe(coder->is_rep0_long
+ [state][pos_state],
SEQ_IS_REP0_LONG) {
rc_update_0(coder->is_rep0_long[
state][pos_state]);
@@ -764,8 +904,9 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
update_short_rep(state);
case SEQ_SHORTREP:
- if (unlikely(dict_put(&dict, dict_get(
- &dict, rep0)))) {
+ if (dict_put_safe(&dict,
+ dict_get(&dict,
+ rep0))) {
coder->sequence = SEQ_SHORTREP;
goto out;
}
@@ -773,8 +914,6 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
continue;
}
- // Repeating more than one byte at
- // distance of rep0.
rc_update_1(coder->is_rep0_long[
state][pos_state]);
@@ -782,11 +921,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
rc_update_1(coder->is_rep0[state]);
case SEQ_IS_REP1:
- // The distance is rep1, rep2 or rep3. Once
- // we find out which one of these three, it
- // is stored to rep0 and rep1, rep2 and rep3
- // are updated accordingly.
- rc_if_0(coder->is_rep1[state], SEQ_IS_REP1) {
+ rc_if_0_safe(coder->is_rep1[state], SEQ_IS_REP1) {
rc_update_0(coder->is_rep1[state]);
const uint32_t distance = rep1;
@@ -796,7 +931,7 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
} else {
rc_update_1(coder->is_rep1[state]);
case SEQ_IS_REP2:
- rc_if_0(coder->is_rep2[state],
+ rc_if_0_safe(coder->is_rep2[state],
SEQ_IS_REP2) {
rc_update_0(coder->is_rep2[
state]);
@@ -821,7 +956,6 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
update_long_rep(state);
- // Decode the length of the repeated match.
len_decode(len, coder->rep_len_decoder,
pos_state, SEQ_REP_LEN);
}
@@ -830,13 +964,10 @@ lzma_decode(void *coder_ptr, lzma_dict *restrict dictptr,
// Repeat from history buffer. //
/////////////////////////////////
- // The length is always between these limits. There is no way
- // to trigger the algorithm to set len outside this range.
assert(len >= MATCH_LEN_MIN);
assert(len <= MATCH_LEN_MAX);
case SEQ_COPY:
- // Repeat len bytes from distance of rep0.
if (unlikely(dict_repeat(&dict, rep0, &len))) {
coder->sequence = SEQ_COPY;
goto out;
@@ -890,7 +1021,6 @@ out:
}
-
static void
lzma_decoder_uncompressed(void *coder_ptr, lzma_vli uncompressed_size,
bool allow_eopm)
@@ -917,7 +1047,7 @@ lzma_decoder_reset(void *coder_ptr, const void *opt)
literal_init(coder->literal, options->lc, options->lp);
coder->literal_context_bits = options->lc;
- coder->literal_pos_mask = (1U << options->lp) - 1;
+ coder->literal_mask = literal_mask_calc(options->lc, options->lp);
// State
coder->state = STATE_LIT_LIT;
diff --git a/src/liblzma/lzma/lzma_decoder.h b/src/liblzma/lzma/lzma_decoder.h
index 1427bc2..9730f56 100644
--- a/src/liblzma/lzma/lzma_decoder.h
+++ b/src/liblzma/lzma/lzma_decoder.h
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lzma_decoder.h
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_LZMA_DECODER_H
diff --git a/src/liblzma/lzma/lzma_encoder.c b/src/liblzma/lzma/lzma_encoder.c
index 559c63e..543ca32 100644
--- a/src/liblzma/lzma/lzma_encoder.c
+++ b/src/liblzma/lzma/lzma_encoder.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lzma_encoder.c
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#include "lzma2_encoder.h"
@@ -49,24 +48,24 @@ literal(lzma_lzma1_encoder *coder, lzma_mf *mf, uint32_t position)
const uint8_t cur_byte = mf->buffer[
mf->read_pos - mf->read_ahead];
probability *subcoder = literal_subcoder(coder->literal,
- coder->literal_context_bits, coder->literal_pos_mask,
+ coder->literal_context_bits, coder->literal_mask,
position, mf->buffer[mf->read_pos - mf->read_ahead - 1]);
if (is_literal_state(coder->state)) {
// Previous LZMA-symbol was a literal. Encode a normal
// literal without a match byte.
+ update_literal_normal(coder->state);
rc_bittree(&coder->rc, subcoder, 8, cur_byte);
} else {
// Previous LZMA-symbol was a match. Use the last byte of
// the match as a "match byte". That is, compare the bits
// of the current literal and the match byte.
+ update_literal_matched(coder->state);
const uint8_t match_byte = mf->buffer[
mf->read_pos - coder->reps[0] - 1
- mf->read_ahead];
literal_matched(&coder->rc, subcoder, match_byte, cur_byte);
}
-
- update_literal(coder->state);
}
@@ -283,7 +282,7 @@ encode_init(lzma_lzma1_encoder *coder, lzma_mf *mf)
mf_skip(mf, 1);
mf->read_ahead = 0;
rc_bit(&coder->rc, &coder->is_match[0][0], 0);
- rc_bittree(&coder->rc, coder->literal[0], 8, mf->buffer[0]);
+ rc_bittree(&coder->rc, coder->literal + 0, 8, mf->buffer[0]);
++coder->uncomp_size;
}
@@ -535,7 +534,7 @@ lzma_lzma_encoder_reset(lzma_lzma1_encoder *coder,
coder->pos_mask = (1U << options->pb) - 1;
coder->literal_context_bits = options->lc;
- coder->literal_pos_mask = (1U << options->lp) - 1;
+ coder->literal_mask = literal_mask_calc(options->lc, options->lp);
// Range coder
rc_reset(&coder->rc);
@@ -712,6 +711,9 @@ static lzma_ret
lzma_encoder_init(lzma_lz_encoder *lz, const lzma_allocator *allocator,
lzma_vli id, const void *options, lzma_lz_options *lz_options)
{
+ if (options == NULL)
+ return LZMA_PROG_ERROR;
+
lz->code = &lzma_encode;
lz->set_out_limit = &lzma_lzma_set_out_limit;
return lzma_lzma_encoder_create(
diff --git a/src/liblzma/lzma/lzma_encoder.h b/src/liblzma/lzma/lzma_encoder.h
index 84d8c91..e8ae807 100644
--- a/src/liblzma/lzma/lzma_encoder.h
+++ b/src/liblzma/lzma/lzma_encoder.h
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lzma_encoder.h
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_LZMA_ENCODER_H
diff --git a/src/liblzma/lzma/lzma_encoder_optimum_fast.c b/src/liblzma/lzma/lzma_encoder_optimum_fast.c
index 6c53d2b..0f063d5 100644
--- a/src/liblzma/lzma/lzma_encoder_optimum_fast.c
+++ b/src/liblzma/lzma/lzma_encoder_optimum_fast.c
@@ -1,12 +1,11 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lzma_encoder_optimum_fast.c
//
// Author: Igor Pavlov
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#include "lzma_encoder_private.h"
diff --git a/src/liblzma/lzma/lzma_encoder_optimum_normal.c b/src/liblzma/lzma/lzma_encoder_optimum_normal.c
index 101c8d4..a6c0398 100644
--- a/src/liblzma/lzma/lzma_encoder_optimum_normal.c
+++ b/src/liblzma/lzma/lzma_encoder_optimum_normal.c
@@ -1,12 +1,11 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lzma_encoder_optimum_normal.c
//
// Author: Igor Pavlov
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#include "lzma_encoder_private.h"
@@ -24,7 +23,7 @@ get_literal_price(const lzma_lzma1_encoder *const coder, const uint32_t pos,
uint32_t match_byte, uint32_t symbol)
{
const probability *const subcoder = literal_subcoder(coder->literal,
- coder->literal_context_bits, coder->literal_pos_mask,
+ coder->literal_context_bits, coder->literal_mask,
pos, prev_byte);
uint32_t price = 0;
diff --git a/src/liblzma/lzma/lzma_encoder_presets.c b/src/liblzma/lzma/lzma_encoder_presets.c
index 711df02..e53483f 100644
--- a/src/liblzma/lzma/lzma_encoder_presets.c
+++ b/src/liblzma/lzma/lzma_encoder_presets.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lzma_encoder_presets.c
@@ -6,9 +8,6 @@
//
// Author: Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#include "common.h"
diff --git a/src/liblzma/lzma/lzma_encoder_private.h b/src/liblzma/lzma/lzma_encoder_private.h
index b228c57..eeea5e9 100644
--- a/src/liblzma/lzma/lzma_encoder_private.h
+++ b/src/liblzma/lzma/lzma_encoder_private.h
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: 0BSD
+
///////////////////////////////////////////////////////////////////////////////
//
/// \file lzma_encoder_private.h
@@ -6,9 +8,6 @@
// Authors: Igor Pavlov
// Lasse Collin
//
-// This file has been put into the public domain.
-// You can do whatever you want with this file.
-//
///////////////////////////////////////////////////////////////////////////////
#ifndef LZMA_LZMA_ENCODER_PRIVATE_H
@@ -116,10 +115,10 @@ struct lzma_lzma1_encoder_s {
uint32_t pos_mask; ///< (1 << pos_bits) - 1
uint32_t literal_context_bits;
- uint32_t literal_pos_mask;
+ uint32_t literal_mask;
// These are the same as in lzma_decoder.c. See comments there.
- probability literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE];
+ probability literal[LITERAL_CODERS_MAX * LITERAL_CODER_SIZE];
probability is_match[STATES][POS_STATES_MAX];
probability is_rep[STATES];
probability is_rep0[STATES];