summaryrefslogtreecommitdiffstats
path: root/src/liblzma/common
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 03:10:08 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 03:10:08 +0000
commit5262a872f308b3b584c97d621992fb3877e392b8 (patch)
treeb956c322376141abeafe639bd72cfecdf16954b5 /src/liblzma/common
parentInitial commit. (diff)
downloadxz-utils-5262a872f308b3b584c97d621992fb3877e392b8.tar.xz
xz-utils-5262a872f308b3b584c97d621992fb3877e392b8.zip
Adding upstream version 5.6.1+really5.4.5.upstream/5.6.1+really5.4.5
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/liblzma/common')
-rw-r--r--src/liblzma/common/Makefile.inc104
-rw-r--r--src/liblzma/common/alone_decoder.c249
-rw-r--r--src/liblzma/common/alone_decoder.h23
-rw-r--r--src/liblzma/common/alone_encoder.c152
-rw-r--r--src/liblzma/common/auto_decoder.c206
-rw-r--r--src/liblzma/common/block_buffer_decoder.c80
-rw-r--r--src/liblzma/common/block_buffer_encoder.c355
-rw-r--r--src/liblzma/common/block_buffer_encoder.h24
-rw-r--r--src/liblzma/common/block_decoder.c289
-rw-r--r--src/liblzma/common/block_decoder.h22
-rw-r--r--src/liblzma/common/block_encoder.c227
-rw-r--r--src/liblzma/common/block_encoder.h47
-rw-r--r--src/liblzma/common/block_header_decoder.c115
-rw-r--r--src/liblzma/common/block_header_encoder.c132
-rw-r--r--src/liblzma/common/block_util.c90
-rw-r--r--src/liblzma/common/common.c481
-rw-r--r--src/liblzma/common/common.h413
-rw-r--r--src/liblzma/common/easy_buffer_encoder.c27
-rw-r--r--src/liblzma/common/easy_decoder_memusage.c24
-rw-r--r--src/liblzma/common/easy_encoder.c24
-rw-r--r--src/liblzma/common/easy_encoder_memusage.c24
-rw-r--r--src/liblzma/common/easy_preset.c27
-rw-r--r--src/liblzma/common/easy_preset.h32
-rw-r--r--src/liblzma/common/file_info.c855
-rw-r--r--src/liblzma/common/filter_buffer_decoder.c88
-rw-r--r--src/liblzma/common/filter_buffer_encoder.c55
-rw-r--r--src/liblzma/common/filter_common.c385
-rw-r--r--src/liblzma/common/filter_common.h51
-rw-r--r--src/liblzma/common/filter_decoder.c198
-rw-r--r--src/liblzma/common/filter_decoder.h23
-rw-r--r--src/liblzma/common/filter_encoder.c308
-rw-r--r--src/liblzma/common/filter_encoder.h27
-rw-r--r--src/liblzma/common/filter_flags_decoder.c46
-rw-r--r--src/liblzma/common/filter_flags_encoder.c56
-rw-r--r--src/liblzma/common/hardware_cputhreads.c34
-rw-r--r--src/liblzma/common/hardware_physmem.c25
-rw-r--r--src/liblzma/common/index.c1269
-rw-r--r--src/liblzma/common/index.h81
-rw-r--r--src/liblzma/common/index_decoder.c362
-rw-r--r--src/liblzma/common/index_decoder.h25
-rw-r--r--src/liblzma/common/index_encoder.c263
-rw-r--r--src/liblzma/common/index_encoder.h23
-rw-r--r--src/liblzma/common/index_hash.c343
-rw-r--r--src/liblzma/common/lzip_decoder.c418
-rw-r--r--src/liblzma/common/lzip_decoder.h22
-rw-r--r--src/liblzma/common/memcmplen.h173
-rw-r--r--src/liblzma/common/microlzma_decoder.c221
-rw-r--r--src/liblzma/common/microlzma_encoder.c141
-rw-r--r--src/liblzma/common/outqueue.c287
-rw-r--r--src/liblzma/common/outqueue.h254
-rw-r--r--src/liblzma/common/stream_buffer_decoder.c91
-rw-r--r--src/liblzma/common/stream_buffer_encoder.c142
-rw-r--r--src/liblzma/common/stream_decoder.c474
-rw-r--r--src/liblzma/common/stream_decoder.h22
-rw-r--r--src/liblzma/common/stream_decoder_mt.c2018
-rw-r--r--src/liblzma/common/stream_encoder.c355
-rw-r--r--src/liblzma/common/stream_encoder_mt.c1283
-rw-r--r--src/liblzma/common/stream_flags_common.c47
-rw-r--r--src/liblzma/common/stream_flags_common.h36
-rw-r--r--src/liblzma/common/stream_flags_decoder.c88
-rw-r--r--src/liblzma/common/stream_flags_encoder.c86
-rw-r--r--src/liblzma/common/string_conversion.c1324
-rw-r--r--src/liblzma/common/vli_decoder.c86
-rw-r--r--src/liblzma/common/vli_encoder.c69
-rw-r--r--src/liblzma/common/vli_size.c30
65 files changed, 15351 insertions, 0 deletions
diff --git a/src/liblzma/common/Makefile.inc b/src/liblzma/common/Makefile.inc
new file mode 100644
index 0000000..f0276a2
--- /dev/null
+++ b/src/liblzma/common/Makefile.inc
@@ -0,0 +1,104 @@
+##
+## Author: Lasse Collin
+##
+## This file has been put into the public domain.
+## You can do whatever you want with this file.
+##
+
+liblzma_la_SOURCES += \
+ common/common.c \
+ common/common.h \
+ common/memcmplen.h \
+ common/block_util.c \
+ common/easy_preset.c \
+ common/easy_preset.h \
+ common/filter_common.c \
+ common/filter_common.h \
+ common/hardware_physmem.c \
+ common/index.c \
+ common/index.h \
+ common/stream_flags_common.c \
+ common/stream_flags_common.h \
+ common/string_conversion.c \
+ common/vli_size.c
+
+if COND_THREADS
+liblzma_la_SOURCES += \
+ common/hardware_cputhreads.c \
+ common/outqueue.c \
+ common/outqueue.h
+endif
+
+if COND_MAIN_ENCODER
+liblzma_la_SOURCES += \
+ common/alone_encoder.c \
+ common/block_buffer_encoder.c \
+ common/block_buffer_encoder.h \
+ common/block_encoder.c \
+ common/block_encoder.h \
+ common/block_header_encoder.c \
+ common/easy_buffer_encoder.c \
+ common/easy_encoder.c \
+ common/easy_encoder_memusage.c \
+ common/filter_buffer_encoder.c \
+ common/filter_encoder.c \
+ common/filter_encoder.h \
+ common/filter_flags_encoder.c \
+ common/index_encoder.c \
+ common/index_encoder.h \
+ common/stream_buffer_encoder.c \
+ common/stream_encoder.c \
+ common/stream_flags_encoder.c \
+ common/vli_encoder.c
+
+if COND_THREADS
+liblzma_la_SOURCES += \
+ common/stream_encoder_mt.c
+endif
+
+if COND_MICROLZMA
+liblzma_la_SOURCES += \
+ common/microlzma_encoder.c
+endif
+endif
+
+if COND_MAIN_DECODER
+liblzma_la_SOURCES += \
+ common/alone_decoder.c \
+ common/alone_decoder.h \
+ common/auto_decoder.c \
+ common/block_buffer_decoder.c \
+ common/block_decoder.c \
+ common/block_decoder.h \
+ common/block_header_decoder.c \
+ common/easy_decoder_memusage.c \
+ common/file_info.c \
+ common/filter_buffer_decoder.c \
+ common/filter_decoder.c \
+ common/filter_decoder.h \
+ common/filter_flags_decoder.c \
+ common/index_decoder.c \
+ common/index_decoder.h \
+ common/index_hash.c \
+ common/stream_buffer_decoder.c \
+ common/stream_decoder.c \
+ common/stream_decoder.h \
+ common/stream_flags_decoder.c \
+ common/vli_decoder.c
+
+if COND_THREADS
+liblzma_la_SOURCES += \
+ common/stream_decoder_mt.c
+endif
+
+if COND_MICROLZMA
+liblzma_la_SOURCES += \
+ common/microlzma_decoder.c
+endif
+
+if COND_LZIP_DECODER
+liblzma_la_SOURCES += \
+ common/lzip_decoder.c \
+ common/lzip_decoder.h
+endif
+endif
diff --git a/src/liblzma/common/alone_decoder.c b/src/liblzma/common/alone_decoder.c
new file mode 100644
index 0000000..1dc85ba
--- /dev/null
+++ b/src/liblzma/common/alone_decoder.c
@@ -0,0 +1,249 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file alone_decoder.c
+/// \brief Decoder for LZMA_Alone files
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "alone_decoder.h"
+#include "lzma_decoder.h"
+#include "lz_decoder.h"
+
+
+typedef struct {
+ lzma_next_coder next;
+
+ enum {
+ SEQ_PROPERTIES,
+ SEQ_DICTIONARY_SIZE,
+ SEQ_UNCOMPRESSED_SIZE,
+ SEQ_CODER_INIT,
+ SEQ_CODE,
+ } sequence;
+
+ /// If true, reject files that are unlikely to be .lzma files.
+ /// If false, more non-.lzma files get accepted and will give
+ /// LZMA_DATA_ERROR either immediately or after a few output bytes.
+ bool picky;
+
+ /// Position in the header fields
+ size_t pos;
+
+ /// Uncompressed size decoded from the header
+ lzma_vli uncompressed_size;
+
+ /// Memory usage limit
+ uint64_t memlimit;
+
+ /// Amount of memory actually needed (only an estimate)
+ uint64_t memusage;
+
+ /// Options decoded from the header needed to initialize
+ /// the LZMA decoder
+ lzma_options_lzma options;
+} lzma_alone_coder;
+
+
+static lzma_ret
+alone_decode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size,
+ lzma_action action)
+{
+ lzma_alone_coder *coder = coder_ptr;
+
+ while (*out_pos < out_size
+ && (coder->sequence == SEQ_CODE || *in_pos < in_size))
+ switch (coder->sequence) {
+ case SEQ_PROPERTIES:
+ if (lzma_lzma_lclppb_decode(&coder->options, in[*in_pos]))
+ return LZMA_FORMAT_ERROR;
+
+ coder->sequence = SEQ_DICTIONARY_SIZE;
+ ++*in_pos;
+ break;
+
+ case SEQ_DICTIONARY_SIZE:
+ coder->options.dict_size
+ |= (size_t)(in[*in_pos]) << (coder->pos * 8);
+
+ if (++coder->pos == 4) {
+ if (coder->picky && coder->options.dict_size
+ != UINT32_MAX) {
+ // A hack to ditch tons of false positives:
+ // We allow only dictionary sizes that are
+ // 2^n or 2^n + 2^(n-1). LZMA_Alone created
+ // only files with 2^n, but accepts any
+ // dictionary size.
+ uint32_t d = coder->options.dict_size - 1;
+ d |= d >> 2;
+ d |= d >> 3;
+ d |= d >> 4;
+ d |= d >> 8;
+ d |= d >> 16;
+ ++d;
+
+ if (d != coder->options.dict_size)
+ return LZMA_FORMAT_ERROR;
+ }
+
+ coder->pos = 0;
+ coder->sequence = SEQ_UNCOMPRESSED_SIZE;
+ }
+
+ ++*in_pos;
+ break;
+
+ case SEQ_UNCOMPRESSED_SIZE:
+ coder->uncompressed_size
+ |= (lzma_vli)(in[*in_pos]) << (coder->pos * 8);
+ ++*in_pos;
+ if (++coder->pos < 8)
+ break;
+
+ // Another hack to ditch false positives: Assume that
+ // if the uncompressed size is known, it must be less
+ // than 256 GiB.
+ //
+ // FIXME? Without picky we allow > LZMA_VLI_MAX which doesn't
+ // really matter in this specific situation (> LZMA_VLI_MAX is
+ // safe in the LZMA decoder) but it's somewhat weird still.
+ if (coder->picky
+ && coder->uncompressed_size != LZMA_VLI_UNKNOWN
+ && coder->uncompressed_size
+ >= (LZMA_VLI_C(1) << 38))
+ return LZMA_FORMAT_ERROR;
+
+ // Use LZMA_FILTER_LZMA1EXT features to specify the
+ // uncompressed size and that the end marker is allowed
+ // even when the uncompressed size is known. Both .lzma
+ // header and LZMA1EXT use UINT64_MAX indicate that size
+ // is unknown.
+ coder->options.ext_flags = LZMA_LZMA1EXT_ALLOW_EOPM;
+ lzma_set_ext_size(coder->options, coder->uncompressed_size);
+
+ // Calculate the memory usage so that it is ready
+ // for SEQ_CODER_INIT.
+ coder->memusage = lzma_lzma_decoder_memusage(&coder->options)
+ + LZMA_MEMUSAGE_BASE;
+
+ coder->pos = 0;
+ coder->sequence = SEQ_CODER_INIT;
+
+ // Fall through
+
+ case SEQ_CODER_INIT: {
+ if (coder->memusage > coder->memlimit)
+ return LZMA_MEMLIMIT_ERROR;
+
+ lzma_filter_info filters[2] = {
+ {
+ .id = LZMA_FILTER_LZMA1EXT,
+ .init = &lzma_lzma_decoder_init,
+ .options = &coder->options,
+ }, {
+ .init = NULL,
+ }
+ };
+
+ return_if_error(lzma_next_filter_init(&coder->next,
+ allocator, filters));
+
+ coder->sequence = SEQ_CODE;
+ break;
+ }
+
+ case SEQ_CODE: {
+ return coder->next.code(coder->next.coder,
+ allocator, in, in_pos, in_size,
+ out, out_pos, out_size, action);
+ }
+
+ default:
+ return LZMA_PROG_ERROR;
+ }
+
+ return LZMA_OK;
+}
+
+
+static void
+alone_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_alone_coder *coder = coder_ptr;
+ lzma_next_end(&coder->next, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+alone_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
+ uint64_t *old_memlimit, uint64_t new_memlimit)
+{
+ lzma_alone_coder *coder = coder_ptr;
+
+ *memusage = coder->memusage;
+ *old_memlimit = coder->memlimit;
+
+ if (new_memlimit != 0) {
+ if (new_memlimit < coder->memusage)
+ return LZMA_MEMLIMIT_ERROR;
+
+ coder->memlimit = new_memlimit;
+ }
+
+ return LZMA_OK;
+}
+
+
+extern lzma_ret
+lzma_alone_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ uint64_t memlimit, bool picky)
+{
+ lzma_next_coder_init(&lzma_alone_decoder_init, next, allocator);
+
+ lzma_alone_coder *coder = next->coder;
+
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_alone_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &alone_decode;
+ next->end = &alone_decoder_end;
+ next->memconfig = &alone_decoder_memconfig;
+ coder->next = LZMA_NEXT_CODER_INIT;
+ }
+
+ coder->sequence = SEQ_PROPERTIES;
+ coder->picky = picky;
+ coder->pos = 0;
+ coder->options.dict_size = 0;
+ coder->options.preset_dict = NULL;
+ coder->options.preset_dict_size = 0;
+ coder->uncompressed_size = 0;
+ coder->memlimit = my_max(1, memlimit);
+ coder->memusage = LZMA_MEMUSAGE_BASE;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_alone_decoder(lzma_stream *strm, uint64_t memlimit)
+{
+ lzma_next_strm_init(lzma_alone_decoder_init, strm, memlimit, false);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/alone_decoder.h b/src/liblzma/common/alone_decoder.h
new file mode 100644
index 0000000..dfa031a
--- /dev/null
+++ b/src/liblzma/common/alone_decoder.h
@@ -0,0 +1,23 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file alone_decoder.h
+/// \brief Decoder for LZMA_Alone files
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_ALONE_DECODER_H
+#define LZMA_ALONE_DECODER_H
+
+#include "common.h"
+
+
+extern lzma_ret lzma_alone_decoder_init(
+ lzma_next_coder *next, const lzma_allocator *allocator,
+ uint64_t memlimit, bool picky);
+
+#endif
diff --git a/src/liblzma/common/alone_encoder.c b/src/liblzma/common/alone_encoder.c
new file mode 100644
index 0000000..7d3812f
--- /dev/null
+++ b/src/liblzma/common/alone_encoder.c
@@ -0,0 +1,152 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file alone_encoder.c
+/// \brief Encoder for LZMA_Alone files
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+#include "lzma_encoder.h"
+
+
+#define ALONE_HEADER_SIZE (1 + 4 + 8)
+
+
+typedef struct {
+ lzma_next_coder next;
+
+ enum {
+ SEQ_HEADER,
+ SEQ_CODE,
+ } sequence;
+
+ size_t header_pos;
+ uint8_t header[ALONE_HEADER_SIZE];
+} lzma_alone_coder;
+
+
+static lzma_ret
+alone_encode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size,
+ lzma_action action)
+{
+ lzma_alone_coder *coder = coder_ptr;
+
+ while (*out_pos < out_size)
+ switch (coder->sequence) {
+ case SEQ_HEADER:
+ lzma_bufcpy(coder->header, &coder->header_pos,
+ ALONE_HEADER_SIZE,
+ out, out_pos, out_size);
+ if (coder->header_pos < ALONE_HEADER_SIZE)
+ return LZMA_OK;
+
+ coder->sequence = SEQ_CODE;
+ break;
+
+ case SEQ_CODE:
+ return coder->next.code(coder->next.coder,
+ allocator, in, in_pos, in_size,
+ out, out_pos, out_size, action);
+
+ default:
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+ return LZMA_OK;
+}
+
+
+static void
+alone_encoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_alone_coder *coder = coder_ptr;
+ lzma_next_end(&coder->next, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+alone_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_options_lzma *options)
+{
+ lzma_next_coder_init(&alone_encoder_init, next, allocator);
+
+ lzma_alone_coder *coder = next->coder;
+
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_alone_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &alone_encode;
+ next->end = &alone_encoder_end;
+ coder->next = LZMA_NEXT_CODER_INIT;
+ }
+
+ // Basic initializations
+ coder->sequence = SEQ_HEADER;
+ coder->header_pos = 0;
+
+ // Encode the header:
+ // - Properties (1 byte)
+ if (lzma_lzma_lclppb_encode(options, coder->header))
+ return LZMA_OPTIONS_ERROR;
+
+ // - Dictionary size (4 bytes)
+ if (options->dict_size < LZMA_DICT_SIZE_MIN)
+ return LZMA_OPTIONS_ERROR;
+
+ // Round up to the next 2^n or 2^n + 2^(n - 1) depending on which
+ // one is the next unless it is UINT32_MAX. While the header would
+ // allow any 32-bit integer, we do this to keep the decoder of liblzma
+ // accepting the resulting files.
+ uint32_t d = options->dict_size - 1;
+ d |= d >> 2;
+ d |= d >> 3;
+ d |= d >> 4;
+ d |= d >> 8;
+ d |= d >> 16;
+ if (d != UINT32_MAX)
+ ++d;
+
+ write32le(coder->header + 1, d);
+
+ // - Uncompressed size (always unknown and using EOPM)
+ memset(coder->header + 1 + 4, 0xFF, 8);
+
+ // Initialize the LZMA encoder.
+ const lzma_filter_info filters[2] = {
+ {
+ .id = LZMA_FILTER_LZMA1,
+ .init = &lzma_lzma_encoder_init,
+ .options = (void *)(options),
+ }, {
+ .init = NULL,
+ }
+ };
+
+ return lzma_next_filter_init(&coder->next, allocator, filters);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_alone_encoder(lzma_stream *strm, const lzma_options_lzma *options)
+{
+ lzma_next_strm_init(alone_encoder_init, strm, options);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/auto_decoder.c b/src/liblzma/common/auto_decoder.c
new file mode 100644
index 0000000..2a5c089
--- /dev/null
+++ b/src/liblzma/common/auto_decoder.c
@@ -0,0 +1,206 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file auto_decoder.c
+/// \brief Autodetect between .xz, .lzma (LZMA_Alone), and .lz (lzip)
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stream_decoder.h"
+#include "alone_decoder.h"
+#ifdef HAVE_LZIP_DECODER
+# include "lzip_decoder.h"
+#endif
+
+
+typedef struct {
+ /// .xz Stream decoder, LZMA_Alone decoder, or lzip decoder
+ lzma_next_coder next;
+
+ uint64_t memlimit;
+ uint32_t flags;
+
+ enum {
+ SEQ_INIT,
+ SEQ_CODE,
+ SEQ_FINISH,
+ } sequence;
+} lzma_auto_coder;
+
+
+static lzma_ret
+auto_decode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_auto_coder *coder = coder_ptr;
+
+ switch (coder->sequence) {
+ case SEQ_INIT:
+ if (*in_pos >= in_size)
+ return LZMA_OK;
+
+ // Update the sequence now, because we want to continue from
+ // SEQ_CODE even if we return some LZMA_*_CHECK.
+ coder->sequence = SEQ_CODE;
+
+ // Detect the file format. .xz files start with 0xFD which
+ // cannot be the first byte of .lzma (LZMA_Alone) format.
+ // The .lz format starts with 0x4C which could be the
+ // first byte of a .lzma file but luckily it would mean
+ // lc/lp/pb being 4/3/1 which liblzma doesn't support because
+ // lc + lp > 4. So using just 0x4C to detect .lz is OK here.
+ if (in[*in_pos] == 0xFD) {
+ return_if_error(lzma_stream_decoder_init(
+ &coder->next, allocator,
+ coder->memlimit, coder->flags));
+#ifdef HAVE_LZIP_DECODER
+ } else if (in[*in_pos] == 0x4C) {
+ return_if_error(lzma_lzip_decoder_init(
+ &coder->next, allocator,
+ coder->memlimit, coder->flags));
+#endif
+ } else {
+ return_if_error(lzma_alone_decoder_init(&coder->next,
+ allocator, coder->memlimit, true));
+
+ // If the application wants to know about missing
+ // integrity check or about the check in general, we
+ // need to handle it here, because LZMA_Alone decoder
+ // doesn't accept any flags.
+ if (coder->flags & LZMA_TELL_NO_CHECK)
+ return LZMA_NO_CHECK;
+
+ if (coder->flags & LZMA_TELL_ANY_CHECK)
+ return LZMA_GET_CHECK;
+ }
+
+ // Fall through
+
+ case SEQ_CODE: {
+ const lzma_ret ret = coder->next.code(
+ coder->next.coder, allocator,
+ in, in_pos, in_size,
+ out, out_pos, out_size, action);
+ if (ret != LZMA_STREAM_END
+ || (coder->flags & LZMA_CONCATENATED) == 0)
+ return ret;
+
+ coder->sequence = SEQ_FINISH;
+ }
+
+ // Fall through
+
+ case SEQ_FINISH:
+ // When LZMA_CONCATENATED was used and we were decoding
+ // a LZMA_Alone file, we need to check that there is no
+ // trailing garbage and wait for LZMA_FINISH.
+ if (*in_pos < in_size)
+ return LZMA_DATA_ERROR;
+
+ return action == LZMA_FINISH ? LZMA_STREAM_END : LZMA_OK;
+
+ default:
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+}
+
+
+static void
+auto_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_auto_coder *coder = coder_ptr;
+ lzma_next_end(&coder->next, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_check
+auto_decoder_get_check(const void *coder_ptr)
+{
+ const lzma_auto_coder *coder = coder_ptr;
+
+ // It is LZMA_Alone if get_check is NULL.
+ return coder->next.get_check == NULL ? LZMA_CHECK_NONE
+ : coder->next.get_check(coder->next.coder);
+}
+
+
+static lzma_ret
+auto_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
+ uint64_t *old_memlimit, uint64_t new_memlimit)
+{
+ lzma_auto_coder *coder = coder_ptr;
+
+ lzma_ret ret;
+
+ if (coder->next.memconfig != NULL) {
+ ret = coder->next.memconfig(coder->next.coder,
+ memusage, old_memlimit, new_memlimit);
+ assert(*old_memlimit == coder->memlimit);
+ } else {
+ // No coder is configured yet. Use the base value as
+ // the current memory usage.
+ *memusage = LZMA_MEMUSAGE_BASE;
+ *old_memlimit = coder->memlimit;
+
+ ret = LZMA_OK;
+ if (new_memlimit != 0 && new_memlimit < *memusage)
+ ret = LZMA_MEMLIMIT_ERROR;
+ }
+
+ if (ret == LZMA_OK && new_memlimit != 0)
+ coder->memlimit = new_memlimit;
+
+ return ret;
+}
+
+
+static lzma_ret
+auto_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ uint64_t memlimit, uint32_t flags)
+{
+ lzma_next_coder_init(&auto_decoder_init, next, allocator);
+
+ if (flags & ~LZMA_SUPPORTED_FLAGS)
+ return LZMA_OPTIONS_ERROR;
+
+ lzma_auto_coder *coder = next->coder;
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_auto_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &auto_decode;
+ next->end = &auto_decoder_end;
+ next->get_check = &auto_decoder_get_check;
+ next->memconfig = &auto_decoder_memconfig;
+ coder->next = LZMA_NEXT_CODER_INIT;
+ }
+
+ coder->memlimit = my_max(1, memlimit);
+ coder->flags = flags;
+ coder->sequence = SEQ_INIT;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_auto_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags)
+{
+ lzma_next_strm_init(auto_decoder_init, strm, memlimit, flags);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/block_buffer_decoder.c b/src/liblzma/common/block_buffer_decoder.c
new file mode 100644
index 0000000..b0ded90
--- /dev/null
+++ b/src/liblzma/common/block_buffer_decoder.c
@@ -0,0 +1,80 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file block_buffer_decoder.c
+/// \brief Single-call .xz Block decoder
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "block_decoder.h"
+
+
+extern LZMA_API(lzma_ret)
+lzma_block_buffer_decode(lzma_block *block, const lzma_allocator *allocator,
+ const uint8_t *in, size_t *in_pos, size_t in_size,
+ uint8_t *out, size_t *out_pos, size_t out_size)
+{
+ if (in_pos == NULL || (in == NULL && *in_pos != in_size)
+ || *in_pos > in_size || out_pos == NULL
+ || (out == NULL && *out_pos != out_size)
+ || *out_pos > out_size)
+ return LZMA_PROG_ERROR;
+
+ // Initialize the Block decoder.
+ lzma_next_coder block_decoder = LZMA_NEXT_CODER_INIT;
+ lzma_ret ret = lzma_block_decoder_init(
+ &block_decoder, allocator, block);
+
+ if (ret == LZMA_OK) {
+ // Save the positions so that we can restore them in case
+ // an error occurs.
+ const size_t in_start = *in_pos;
+ const size_t out_start = *out_pos;
+
+ // Do the actual decoding.
+ ret = block_decoder.code(block_decoder.coder, allocator,
+ in, in_pos, in_size, out, out_pos, out_size,
+ LZMA_FINISH);
+
+ if (ret == LZMA_STREAM_END) {
+ ret = LZMA_OK;
+ } else {
+ if (ret == LZMA_OK) {
+ // Either the input was truncated or the
+ // output buffer was too small.
+ assert(*in_pos == in_size
+ || *out_pos == out_size);
+
+ // If all the input was consumed, then the
+ // input is truncated, even if the output
+ // buffer is also full. This is because
+ // processing the last byte of the Block
+ // never produces output.
+ //
+ // NOTE: This assumption may break when new
+ // filters are added, if the end marker of
+ // the filter doesn't consume at least one
+ // complete byte.
+ if (*in_pos == in_size)
+ ret = LZMA_DATA_ERROR;
+ else
+ ret = LZMA_BUF_ERROR;
+ }
+
+ // Restore the positions.
+ *in_pos = in_start;
+ *out_pos = out_start;
+ }
+ }
+
+ // Free the decoder memory. This needs to be done even if
+ // initialization fails, because the internal API doesn't
+ // require the initialization function to free its memory on error.
+ lzma_next_end(&block_decoder, allocator);
+
+ return ret;
+}
diff --git a/src/liblzma/common/block_buffer_encoder.c b/src/liblzma/common/block_buffer_encoder.c
new file mode 100644
index 0000000..fdef02d
--- /dev/null
+++ b/src/liblzma/common/block_buffer_encoder.c
@@ -0,0 +1,355 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file block_buffer_encoder.c
+/// \brief Single-call .xz Block encoder
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "block_buffer_encoder.h"
+#include "block_encoder.h"
+#include "filter_encoder.h"
+#include "lzma2_encoder.h"
+#include "check.h"
+
+
+/// Estimate the maximum size of the Block Header and Check fields for
+/// a Block that uses LZMA2 uncompressed chunks. We could use
+/// lzma_block_header_size() but this is simpler.
+///
+/// Block Header Size + Block Flags + Compressed Size
+/// + Uncompressed Size + Filter Flags for LZMA2 + CRC32 + Check
+/// and round up to the next multiple of four to take Header Padding
+/// into account.
+#define HEADERS_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 3 + 4 \
+ + LZMA_CHECK_SIZE_MAX + 3) & ~3)
+
+
+static uint64_t
+lzma2_bound(uint64_t uncompressed_size)
+{
+ // Prevent integer overflow in overhead calculation.
+ if (uncompressed_size > COMPRESSED_SIZE_MAX)
+ return 0;
+
+ // Calculate the exact overhead of the LZMA2 headers: Round
+ // uncompressed_size up to the next multiple of LZMA2_CHUNK_MAX,
+ // multiply by the size of per-chunk header, and add one byte for
+ // the end marker.
+ const uint64_t overhead = ((uncompressed_size + LZMA2_CHUNK_MAX - 1)
+ / LZMA2_CHUNK_MAX)
+ * LZMA2_HEADER_UNCOMPRESSED + 1;
+
+ // Catch the possible integer overflow.
+ if (COMPRESSED_SIZE_MAX - overhead < uncompressed_size)
+ return 0;
+
+ return uncompressed_size + overhead;
+}
+
+
+extern uint64_t
+lzma_block_buffer_bound64(uint64_t uncompressed_size)
+{
+ // If the data doesn't compress, we always use uncompressed
+ // LZMA2 chunks.
+ uint64_t lzma2_size = lzma2_bound(uncompressed_size);
+ if (lzma2_size == 0)
+ return 0;
+
+ // Take Block Padding into account.
+ lzma2_size = (lzma2_size + 3) & ~UINT64_C(3);
+
+ // No risk of integer overflow because lzma2_bound() already takes
+ // into account the size of the headers in the Block.
+ return HEADERS_BOUND + lzma2_size;
+}
+
+
+extern LZMA_API(size_t)
+lzma_block_buffer_bound(size_t uncompressed_size)
+{
+ uint64_t ret = lzma_block_buffer_bound64(uncompressed_size);
+
+#if SIZE_MAX < UINT64_MAX
+ // Catch the possible integer overflow on 32-bit systems.
+ if (ret > SIZE_MAX)
+ return 0;
+#endif
+
+ return ret;
+}
+
+
+static lzma_ret
+block_encode_uncompressed(lzma_block *block, const uint8_t *in, size_t in_size,
+ uint8_t *out, size_t *out_pos, size_t out_size)
+{
+ // Use LZMA2 uncompressed chunks. We wouldn't need a dictionary at
+ // all, but LZMA2 always requires a dictionary, so use the minimum
+ // value to minimize memory usage of the decoder.
+ lzma_options_lzma lzma2 = {
+ .dict_size = LZMA_DICT_SIZE_MIN,
+ };
+
+ lzma_filter filters[2];
+ filters[0].id = LZMA_FILTER_LZMA2;
+ filters[0].options = &lzma2;
+ filters[1].id = LZMA_VLI_UNKNOWN;
+
+ // Set the above filter options to *block temporarily so that we can
+ // encode the Block Header.
+ lzma_filter *filters_orig = block->filters;
+ block->filters = filters;
+
+ if (lzma_block_header_size(block) != LZMA_OK) {
+ block->filters = filters_orig;
+ return LZMA_PROG_ERROR;
+ }
+
+ // Check that there's enough output space. The caller has already
+ // set block->compressed_size to what lzma2_bound() has returned,
+ // so we can reuse that value. We know that compressed_size is a
+ // known valid VLI and header_size is a small value so their sum
+ // will never overflow.
+ assert(block->compressed_size == lzma2_bound(in_size));
+ if (out_size - *out_pos
+ < block->header_size + block->compressed_size) {
+ block->filters = filters_orig;
+ return LZMA_BUF_ERROR;
+ }
+
+ if (lzma_block_header_encode(block, out + *out_pos) != LZMA_OK) {
+ block->filters = filters_orig;
+ return LZMA_PROG_ERROR;
+ }
+
+ block->filters = filters_orig;
+ *out_pos += block->header_size;
+
+ // Encode the data using LZMA2 uncompressed chunks.
+ size_t in_pos = 0;
+ uint8_t control = 0x01; // Dictionary reset
+
+ while (in_pos < in_size) {
+ // Control byte: Indicate uncompressed chunk, of which
+ // the first resets the dictionary.
+ out[(*out_pos)++] = control;
+ control = 0x02; // No dictionary reset
+
+ // Size of the uncompressed chunk
+ const size_t copy_size
+ = my_min(in_size - in_pos, LZMA2_CHUNK_MAX);
+ out[(*out_pos)++] = (copy_size - 1) >> 8;
+ out[(*out_pos)++] = (copy_size - 1) & 0xFF;
+
+ // The actual data
+ assert(*out_pos + copy_size <= out_size);
+ memcpy(out + *out_pos, in + in_pos, copy_size);
+
+ in_pos += copy_size;
+ *out_pos += copy_size;
+ }
+
+ // End marker
+ out[(*out_pos)++] = 0x00;
+ assert(*out_pos <= out_size);
+
+ return LZMA_OK;
+}
+
+
+static lzma_ret
+block_encode_normal(lzma_block *block, const lzma_allocator *allocator,
+ const uint8_t *in, size_t in_size,
+ uint8_t *out, size_t *out_pos, size_t out_size)
+{
+ // Find out the size of the Block Header.
+ return_if_error(lzma_block_header_size(block));
+
+ // Reserve space for the Block Header and skip it for now.
+ if (out_size - *out_pos <= block->header_size)
+ return LZMA_BUF_ERROR;
+
+ const size_t out_start = *out_pos;
+ *out_pos += block->header_size;
+
+ // Limit out_size so that we stop encoding if the output would grow
+ // bigger than what uncompressed Block would be.
+ if (out_size - *out_pos > block->compressed_size)
+ out_size = *out_pos + block->compressed_size;
+
+ // TODO: In many common cases this could be optimized to use
+ // significantly less memory.
+ lzma_next_coder raw_encoder = LZMA_NEXT_CODER_INIT;
+ lzma_ret ret = lzma_raw_encoder_init(
+ &raw_encoder, allocator, block->filters);
+
+ if (ret == LZMA_OK) {
+ size_t in_pos = 0;
+ ret = raw_encoder.code(raw_encoder.coder, allocator,
+ in, &in_pos, in_size, out, out_pos, out_size,
+ LZMA_FINISH);
+ }
+
+ // NOTE: This needs to be run even if lzma_raw_encoder_init() failed.
+ lzma_next_end(&raw_encoder, allocator);
+
+ if (ret == LZMA_STREAM_END) {
+ // Compression was successful. Write the Block Header.
+ block->compressed_size
+ = *out_pos - (out_start + block->header_size);
+ ret = lzma_block_header_encode(block, out + out_start);
+ if (ret != LZMA_OK)
+ ret = LZMA_PROG_ERROR;
+
+ } else if (ret == LZMA_OK) {
+ // Output buffer became full.
+ ret = LZMA_BUF_ERROR;
+ }
+
+ // Reset *out_pos if something went wrong.
+ if (ret != LZMA_OK)
+ *out_pos = out_start;
+
+ return ret;
+}
+
+
+static lzma_ret
+block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
+ const uint8_t *in, size_t in_size,
+ uint8_t *out, size_t *out_pos, size_t out_size,
+ bool try_to_compress)
+{
+ // Validate the arguments.
+ if (block == NULL || (in == NULL && in_size != 0) || out == NULL
+ || out_pos == NULL || *out_pos > out_size)
+ return LZMA_PROG_ERROR;
+
+ // The contents of the structure may depend on the version so
+ // check the version before validating the contents of *block.
+ if (block->version > 1)
+ return LZMA_OPTIONS_ERROR;
+
+ if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX
+ || (try_to_compress && block->filters == NULL))
+ return LZMA_PROG_ERROR;
+
+ if (!lzma_check_is_supported(block->check))
+ return LZMA_UNSUPPORTED_CHECK;
+
+ // Size of a Block has to be a multiple of four, so limit the size
+ // here already. This way we don't need to check it again when adding
+ // Block Padding.
+ out_size -= (out_size - *out_pos) & 3;
+
+ // Get the size of the Check field.
+ const size_t check_size = lzma_check_size(block->check);
+ assert(check_size != UINT32_MAX);
+
+ // Reserve space for the Check field.
+ if (out_size - *out_pos <= check_size)
+ return LZMA_BUF_ERROR;
+
+ out_size -= check_size;
+
+ // Initialize block->uncompressed_size and calculate the worst-case
+ // value for block->compressed_size.
+ block->uncompressed_size = in_size;
+ block->compressed_size = lzma2_bound(in_size);
+ if (block->compressed_size == 0)
+ return LZMA_DATA_ERROR;
+
+ // Do the actual compression.
+ lzma_ret ret = LZMA_BUF_ERROR;
+ if (try_to_compress)
+ ret = block_encode_normal(block, allocator,
+ in, in_size, out, out_pos, out_size);
+
+ if (ret != LZMA_OK) {
+ // If the error was something else than output buffer
+ // becoming full, return the error now.
+ if (ret != LZMA_BUF_ERROR)
+ return ret;
+
+ // The data was incompressible (at least with the options
+ // given to us) or the output buffer was too small. Use the
+ // uncompressed chunks of LZMA2 to wrap the data into a valid
+ // Block. If we haven't been given enough output space, even
+ // this may fail.
+ return_if_error(block_encode_uncompressed(block, in, in_size,
+ out, out_pos, out_size));
+ }
+
+ assert(*out_pos <= out_size);
+
+ // Block Padding. No buffer overflow here, because we already adjusted
+ // out_size so that (out_size - out_start) is a multiple of four.
+ // Thus, if the buffer is full, the loop body can never run.
+ for (size_t i = (size_t)(block->compressed_size); i & 3; ++i) {
+ assert(*out_pos < out_size);
+ out[(*out_pos)++] = 0x00;
+ }
+
+ // If there's no Check field, we are done now.
+ if (check_size > 0) {
+ // Calculate the integrity check. We reserved space for
+ // the Check field earlier so we don't need to check for
+ // available output space here.
+ lzma_check_state check;
+ lzma_check_init(&check, block->check);
+ lzma_check_update(&check, block->check, in, in_size);
+ lzma_check_finish(&check, block->check);
+
+ memcpy(block->raw_check, check.buffer.u8, check_size);
+ memcpy(out + *out_pos, check.buffer.u8, check_size);
+ *out_pos += check_size;
+ }
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_block_buffer_encode(lzma_block *block, const lzma_allocator *allocator,
+ const uint8_t *in, size_t in_size,
+ uint8_t *out, size_t *out_pos, size_t out_size)
+{
+ return block_buffer_encode(block, allocator,
+ in, in_size, out, out_pos, out_size, true);
+}
+
+
+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+// This is for compatibility with binaries linked against liblzma that
+// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7.
+LZMA_SYMVER_API("lzma_block_uncomp_encode@XZ_5.2.2",
+ lzma_ret, lzma_block_uncomp_encode_522)(lzma_block *block,
+ const uint8_t *in, size_t in_size,
+ uint8_t *out, size_t *out_pos, size_t out_size)
+ lzma_nothrow lzma_attr_warn_unused_result
+ __attribute__((__alias__("lzma_block_uncomp_encode_52")));
+
+LZMA_SYMVER_API("lzma_block_uncomp_encode@@XZ_5.2",
+ lzma_ret, lzma_block_uncomp_encode_52)(lzma_block *block,
+ const uint8_t *in, size_t in_size,
+ uint8_t *out, size_t *out_pos, size_t out_size)
+ lzma_nothrow lzma_attr_warn_unused_result;
+
+#define lzma_block_uncomp_encode lzma_block_uncomp_encode_52
+#endif
+extern LZMA_API(lzma_ret)
+lzma_block_uncomp_encode(lzma_block *block,
+ const uint8_t *in, size_t in_size,
+ uint8_t *out, size_t *out_pos, size_t out_size)
+{
+ // It won't allocate any memory from heap so no need
+ // for lzma_allocator.
+ return block_buffer_encode(block, NULL,
+ in, in_size, out, out_pos, out_size, false);
+}
diff --git a/src/liblzma/common/block_buffer_encoder.h b/src/liblzma/common/block_buffer_encoder.h
new file mode 100644
index 0000000..653207f
--- /dev/null
+++ b/src/liblzma/common/block_buffer_encoder.h
@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file block_buffer_encoder.h
+/// \brief Single-call .xz Block encoder
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_BLOCK_BUFFER_ENCODER_H
+#define LZMA_BLOCK_BUFFER_ENCODER_H
+
+#include "common.h"
+
+
+/// uint64_t version of lzma_block_buffer_bound(). It is used by
+/// stream_encoder_mt.c. Probably the original lzma_block_buffer_bound()
+/// should have been 64-bit, but fixing it would break the ABI.
+extern uint64_t lzma_block_buffer_bound64(uint64_t uncompressed_size);
+
+#endif
diff --git a/src/liblzma/common/block_decoder.c b/src/liblzma/common/block_decoder.c
new file mode 100644
index 0000000..be647d4
--- /dev/null
+++ b/src/liblzma/common/block_decoder.c
@@ -0,0 +1,289 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file block_decoder.c
+/// \brief Decodes .xz Blocks
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "block_decoder.h"
+#include "filter_decoder.h"
+#include "check.h"
+
+
+typedef struct {
+ enum {
+ SEQ_CODE,
+ SEQ_PADDING,
+ SEQ_CHECK,
+ } sequence;
+
+ /// The filters in the chain; initialized with lzma_raw_decoder_init().
+ lzma_next_coder next;
+
+ /// Decoding options; we also write Compressed Size and Uncompressed
+ /// Size back to this structure when the decoding has been finished.
+ lzma_block *block;
+
+ /// Compressed Size calculated while decoding
+ lzma_vli compressed_size;
+
+ /// Uncompressed Size calculated while decoding
+ lzma_vli uncompressed_size;
+
+ /// Maximum allowed Compressed Size; this takes into account the
+ /// size of the Block Header and Check fields when Compressed Size
+ /// is unknown.
+ lzma_vli compressed_limit;
+
+ /// Maximum allowed Uncompressed Size.
+ lzma_vli uncompressed_limit;
+
+ /// Position when reading the Check field
+ size_t check_pos;
+
+ /// Check of the uncompressed data
+ lzma_check_state check;
+
+ /// True if the integrity check won't be calculated and verified.
+ bool ignore_check;
+} lzma_block_coder;
+
+
+static inline bool
+is_size_valid(lzma_vli size, lzma_vli reference)
+{
+ return reference == LZMA_VLI_UNKNOWN || reference == size;
+}
+
+
+static lzma_ret
+block_decode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_block_coder *coder = coder_ptr;
+
+ switch (coder->sequence) {
+ case SEQ_CODE: {
+ const size_t in_start = *in_pos;
+ const size_t out_start = *out_pos;
+
+ // Limit the amount of input and output space that we give
+ // to the raw decoder based on the information we have
+ // (or don't have) from Block Header.
+ const size_t in_stop = *in_pos + (size_t)my_min(
+ in_size - *in_pos,
+ coder->compressed_limit - coder->compressed_size);
+ const size_t out_stop = *out_pos + (size_t)my_min(
+ out_size - *out_pos,
+ coder->uncompressed_limit - coder->uncompressed_size);
+
+ const lzma_ret ret = coder->next.code(coder->next.coder,
+ allocator, in, in_pos, in_stop,
+ out, out_pos, out_stop, action);
+
+ const size_t in_used = *in_pos - in_start;
+ const size_t out_used = *out_pos - out_start;
+
+ // Because we have limited the input and output sizes,
+ // we know that these cannot grow too big or overflow.
+ coder->compressed_size += in_used;
+ coder->uncompressed_size += out_used;
+
+ if (ret == LZMA_OK) {
+ const bool comp_done = coder->compressed_size
+ == coder->block->compressed_size;
+ const bool uncomp_done = coder->uncompressed_size
+ == coder->block->uncompressed_size;
+
+ // If both input and output amounts match the sizes
+ // in Block Header but we still got LZMA_OK instead
+ // of LZMA_STREAM_END, the file is broken.
+ if (comp_done && uncomp_done)
+ return LZMA_DATA_ERROR;
+
+ // If the decoder has consumed all the input that it
+ // needs but it still couldn't fill the output buffer
+ // or return LZMA_STREAM_END, the file is broken.
+ if (comp_done && *out_pos < out_size)
+ return LZMA_DATA_ERROR;
+
+ // If the decoder has produced all the output but
+ // it still didn't return LZMA_STREAM_END or consume
+ // more input (for example, detecting an end of
+ // payload marker may need more input but produce
+ // no output) the file is broken.
+ if (uncomp_done && *in_pos < in_size)
+ return LZMA_DATA_ERROR;
+ }
+
+ // Don't waste time updating the integrity check if it will be
+ // ignored. Also skip it if no new output was produced. This
+ // avoids null pointer + 0 (undefined behavior) when out == 0.
+ if (!coder->ignore_check && out_used > 0)
+ lzma_check_update(&coder->check, coder->block->check,
+ out + out_start, out_used);
+
+ if (ret != LZMA_STREAM_END)
+ return ret;
+
+ // Compressed and Uncompressed Sizes are now at their final
+ // values. Verify that they match the values given to us.
+ if (!is_size_valid(coder->compressed_size,
+ coder->block->compressed_size)
+ || !is_size_valid(coder->uncompressed_size,
+ coder->block->uncompressed_size))
+ return LZMA_DATA_ERROR;
+
+ // Copy the values into coder->block. The caller
+ // may use this information to construct Index.
+ coder->block->compressed_size = coder->compressed_size;
+ coder->block->uncompressed_size = coder->uncompressed_size;
+
+ coder->sequence = SEQ_PADDING;
+ }
+
+ // Fall through
+
+ case SEQ_PADDING:
+ // Compressed Data is padded to a multiple of four bytes.
+ while (coder->compressed_size & 3) {
+ if (*in_pos >= in_size)
+ return LZMA_OK;
+
+ // We use compressed_size here just get the Padding
+ // right. The actual Compressed Size was stored to
+ // coder->block already, and won't be modified by
+ // us anymore.
+ ++coder->compressed_size;
+
+ if (in[(*in_pos)++] != 0x00)
+ return LZMA_DATA_ERROR;
+ }
+
+ if (coder->block->check == LZMA_CHECK_NONE)
+ return LZMA_STREAM_END;
+
+ if (!coder->ignore_check)
+ lzma_check_finish(&coder->check, coder->block->check);
+
+ coder->sequence = SEQ_CHECK;
+
+ // Fall through
+
+ case SEQ_CHECK: {
+ const size_t check_size = lzma_check_size(coder->block->check);
+ lzma_bufcpy(in, in_pos, in_size, coder->block->raw_check,
+ &coder->check_pos, check_size);
+ if (coder->check_pos < check_size)
+ return LZMA_OK;
+
+ // Validate the Check only if we support it.
+ // coder->check.buffer may be uninitialized
+ // when the Check ID is not supported.
+ if (!coder->ignore_check
+ && lzma_check_is_supported(coder->block->check)
+ && memcmp(coder->block->raw_check,
+ coder->check.buffer.u8,
+ check_size) != 0)
+ return LZMA_DATA_ERROR;
+
+ return LZMA_STREAM_END;
+ }
+ }
+
+ return LZMA_PROG_ERROR;
+}
+
+
+static void
+block_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_block_coder *coder = coder_ptr;
+ lzma_next_end(&coder->next, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+extern lzma_ret
+lzma_block_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ lzma_block *block)
+{
+ lzma_next_coder_init(&lzma_block_decoder_init, next, allocator);
+
+ // Validate the options. lzma_block_unpadded_size() does that for us
+ // except for Uncompressed Size and filters. Filters are validated
+ // by the raw decoder.
+ if (lzma_block_unpadded_size(block) == 0
+ || !lzma_vli_is_valid(block->uncompressed_size))
+ return LZMA_PROG_ERROR;
+
+ // Allocate *next->coder if needed.
+ lzma_block_coder *coder = next->coder;
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_block_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &block_decode;
+ next->end = &block_decoder_end;
+ coder->next = LZMA_NEXT_CODER_INIT;
+ }
+
+ // Basic initializations
+ coder->sequence = SEQ_CODE;
+ coder->block = block;
+ coder->compressed_size = 0;
+ coder->uncompressed_size = 0;
+
+ // If Compressed Size is not known, we calculate the maximum allowed
+ // value so that encoded size of the Block (including Block Padding)
+ // is still a valid VLI and a multiple of four.
+ coder->compressed_limit
+ = block->compressed_size == LZMA_VLI_UNKNOWN
+ ? (LZMA_VLI_MAX & ~LZMA_VLI_C(3))
+ - block->header_size
+ - lzma_check_size(block->check)
+ : block->compressed_size;
+
+ // With Uncompressed Size this is simpler. If Block Header lacks
+ // the size info, then LZMA_VLI_MAX is the maximum possible
+ // Uncompressed Size.
+ coder->uncompressed_limit
+ = block->uncompressed_size == LZMA_VLI_UNKNOWN
+ ? LZMA_VLI_MAX
+ : block->uncompressed_size;
+
+ // Initialize the check. It's caller's problem if the Check ID is not
+ // supported, and the Block decoder cannot verify the Check field.
+ // Caller can test lzma_check_is_supported(block->check).
+ coder->check_pos = 0;
+ lzma_check_init(&coder->check, block->check);
+
+ coder->ignore_check = block->version >= 1
+ ? block->ignore_check : false;
+
+ // Initialize the filter chain.
+ return lzma_raw_decoder_init(&coder->next, allocator,
+ block->filters);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_block_decoder(lzma_stream *strm, lzma_block *block)
+{
+ lzma_next_strm_init(lzma_block_decoder_init, strm, block);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/block_decoder.h b/src/liblzma/common/block_decoder.h
new file mode 100644
index 0000000..718c5ce
--- /dev/null
+++ b/src/liblzma/common/block_decoder.h
@@ -0,0 +1,22 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file block_decoder.h
+/// \brief Decodes .xz Blocks
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_BLOCK_DECODER_H
+#define LZMA_BLOCK_DECODER_H
+
+#include "common.h"
+
+
+extern lzma_ret lzma_block_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator, lzma_block *block);
+
+#endif
diff --git a/src/liblzma/common/block_encoder.c b/src/liblzma/common/block_encoder.c
new file mode 100644
index 0000000..4a136ef
--- /dev/null
+++ b/src/liblzma/common/block_encoder.c
@@ -0,0 +1,227 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file block_encoder.c
+/// \brief Encodes .xz Blocks
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "block_encoder.h"
+#include "filter_encoder.h"
+#include "check.h"
+
+
+typedef struct {
+ /// The filters in the chain; initialized with lzma_raw_decoder_init().
+ lzma_next_coder next;
+
+ /// Encoding options; we also write Unpadded Size, Compressed Size,
+ /// and Uncompressed Size back to this structure when the encoding
+ /// has been finished.
+ lzma_block *block;
+
+ enum {
+ SEQ_CODE,
+ SEQ_PADDING,
+ SEQ_CHECK,
+ } sequence;
+
+ /// Compressed Size calculated while encoding
+ lzma_vli compressed_size;
+
+ /// Uncompressed Size calculated while encoding
+ lzma_vli uncompressed_size;
+
+ /// Position in the Check field
+ size_t pos;
+
+ /// Check of the uncompressed data
+ lzma_check_state check;
+} lzma_block_coder;
+
+
+static lzma_ret
+block_encode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_block_coder *coder = coder_ptr;
+
+ // Check that our amount of input stays in proper limits.
+ if (LZMA_VLI_MAX - coder->uncompressed_size < in_size - *in_pos)
+ return LZMA_DATA_ERROR;
+
+ switch (coder->sequence) {
+ case SEQ_CODE: {
+ const size_t in_start = *in_pos;
+ const size_t out_start = *out_pos;
+
+ const lzma_ret ret = coder->next.code(coder->next.coder,
+ allocator, in, in_pos, in_size,
+ out, out_pos, out_size, action);
+
+ const size_t in_used = *in_pos - in_start;
+ const size_t out_used = *out_pos - out_start;
+
+ if (COMPRESSED_SIZE_MAX - coder->compressed_size < out_used)
+ return LZMA_DATA_ERROR;
+
+ coder->compressed_size += out_used;
+
+ // No need to check for overflow because we have already
+ // checked it at the beginning of this function.
+ coder->uncompressed_size += in_used;
+
+ // Call lzma_check_update() only if input was consumed. This
+ // avoids null pointer + 0 (undefined behavior) when in == 0.
+ if (in_used > 0)
+ lzma_check_update(&coder->check, coder->block->check,
+ in + in_start, in_used);
+
+ if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH)
+ return ret;
+
+ assert(*in_pos == in_size);
+ assert(action == LZMA_FINISH);
+
+ // Copy the values into coder->block. The caller
+ // may use this information to construct Index.
+ coder->block->compressed_size = coder->compressed_size;
+ coder->block->uncompressed_size = coder->uncompressed_size;
+
+ coder->sequence = SEQ_PADDING;
+ }
+
+ // Fall through
+
+ case SEQ_PADDING:
+ // Pad Compressed Data to a multiple of four bytes. We can
+ // use coder->compressed_size for this since we don't need
+ // it for anything else anymore.
+ while (coder->compressed_size & 3) {
+ if (*out_pos >= out_size)
+ return LZMA_OK;
+
+ out[*out_pos] = 0x00;
+ ++*out_pos;
+ ++coder->compressed_size;
+ }
+
+ if (coder->block->check == LZMA_CHECK_NONE)
+ return LZMA_STREAM_END;
+
+ lzma_check_finish(&coder->check, coder->block->check);
+
+ coder->sequence = SEQ_CHECK;
+
+ // Fall through
+
+ case SEQ_CHECK: {
+ const size_t check_size = lzma_check_size(coder->block->check);
+ lzma_bufcpy(coder->check.buffer.u8, &coder->pos, check_size,
+ out, out_pos, out_size);
+ if (coder->pos < check_size)
+ return LZMA_OK;
+
+ memcpy(coder->block->raw_check, coder->check.buffer.u8,
+ check_size);
+ return LZMA_STREAM_END;
+ }
+ }
+
+ return LZMA_PROG_ERROR;
+}
+
+
+static void
+block_encoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_block_coder *coder = coder_ptr;
+ lzma_next_end(&coder->next, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+block_encoder_update(void *coder_ptr, const lzma_allocator *allocator,
+ const lzma_filter *filters lzma_attribute((__unused__)),
+ const lzma_filter *reversed_filters)
+{
+ lzma_block_coder *coder = coder_ptr;
+
+ if (coder->sequence != SEQ_CODE)
+ return LZMA_PROG_ERROR;
+
+ return lzma_next_filter_update(
+ &coder->next, allocator, reversed_filters);
+}
+
+
+extern lzma_ret
+lzma_block_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ lzma_block *block)
+{
+ lzma_next_coder_init(&lzma_block_encoder_init, next, allocator);
+
+ if (block == NULL)
+ return LZMA_PROG_ERROR;
+
+ // The contents of the structure may depend on the version so
+ // check the version first.
+ if (block->version > 1)
+ return LZMA_OPTIONS_ERROR;
+
+ // If the Check ID is not supported, we cannot calculate the check and
+ // thus not create a proper Block.
+ if ((unsigned int)(block->check) > LZMA_CHECK_ID_MAX)
+ return LZMA_PROG_ERROR;
+
+ if (!lzma_check_is_supported(block->check))
+ return LZMA_UNSUPPORTED_CHECK;
+
+ // Allocate and initialize *next->coder if needed.
+ lzma_block_coder *coder = next->coder;
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_block_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &block_encode;
+ next->end = &block_encoder_end;
+ next->update = &block_encoder_update;
+ coder->next = LZMA_NEXT_CODER_INIT;
+ }
+
+ // Basic initializations
+ coder->sequence = SEQ_CODE;
+ coder->block = block;
+ coder->compressed_size = 0;
+ coder->uncompressed_size = 0;
+ coder->pos = 0;
+
+ // Initialize the check
+ lzma_check_init(&coder->check, block->check);
+
+ // Initialize the requested filters.
+ return lzma_raw_encoder_init(&coder->next, allocator, block->filters);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_block_encoder(lzma_stream *strm, lzma_block *block)
+{
+ lzma_next_strm_init(lzma_block_encoder_init, strm, block);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/block_encoder.h b/src/liblzma/common/block_encoder.h
new file mode 100644
index 0000000..bd97c18
--- /dev/null
+++ b/src/liblzma/common/block_encoder.h
@@ -0,0 +1,47 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file block_encoder.h
+/// \brief Encodes .xz Blocks
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_BLOCK_ENCODER_H
+#define LZMA_BLOCK_ENCODER_H
+
+#include "common.h"
+
+
+/// \brief Biggest Compressed Size value that the Block encoder supports
+///
+/// The maximum size of a single Block is limited by the maximum size of
+/// a Stream, which in theory is 2^63 - 3 bytes (i.e. LZMA_VLI_MAX - 3).
+/// While the size is really big and no one should hit it in practice, we
+/// take it into account in some places anyway to catch some errors e.g. if
+/// application passes insanely big value to some function.
+///
+/// We could take into account the headers etc. to determine the exact
+/// maximum size of the Compressed Data field, but the complexity would give
+/// us nothing useful. Instead, limit the size of Compressed Data so that
+/// even with biggest possible Block Header and Check fields the total
+/// encoded size of the Block stays as a valid VLI. This doesn't guarantee
+/// that the size of the Stream doesn't grow too big, but that problem is
+/// taken care outside the Block handling code.
+///
+/// ~LZMA_VLI_C(3) is to guarantee that if we need padding at the end of
+/// the Compressed Data field, it will still stay in the proper limit.
+///
+/// This constant is in this file because it is needed in both
+/// block_encoder.c and block_buffer_encoder.c.
+#define COMPRESSED_SIZE_MAX ((LZMA_VLI_MAX - LZMA_BLOCK_HEADER_SIZE_MAX \
+ - LZMA_CHECK_SIZE_MAX) & ~LZMA_VLI_C(3))
+
+
+extern lzma_ret lzma_block_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator, lzma_block *block);
+
+#endif
diff --git a/src/liblzma/common/block_header_decoder.c b/src/liblzma/common/block_header_decoder.c
new file mode 100644
index 0000000..c4935dc
--- /dev/null
+++ b/src/liblzma/common/block_header_decoder.c
@@ -0,0 +1,115 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file block_header_decoder.c
+/// \brief Decodes Block Header from .xz files
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+#include "check.h"
+
+
+extern LZMA_API(lzma_ret)
+lzma_block_header_decode(lzma_block *block,
+ const lzma_allocator *allocator, const uint8_t *in)
+{
+ // NOTE: We consider the header to be corrupt not only when the
+ // CRC32 doesn't match, but also when variable-length integers
+ // are invalid or over 63 bits, or if the header is too small
+ // to contain the claimed information.
+
+ // Catch unexpected NULL pointers.
+ if (block == NULL || block->filters == NULL || in == NULL)
+ return LZMA_PROG_ERROR;
+
+ // Initialize the filter options array. This way the caller can
+ // safely free() the options even if an error occurs in this function.
+ for (size_t i = 0; i <= LZMA_FILTERS_MAX; ++i) {
+ block->filters[i].id = LZMA_VLI_UNKNOWN;
+ block->filters[i].options = NULL;
+ }
+
+ // Versions 0 and 1 are supported. If a newer version was specified,
+ // we need to downgrade it.
+ if (block->version > 1)
+ block->version = 1;
+
+ // This isn't a Block Header option, but since the decompressor will
+ // read it if version >= 1, it's better to initialize it here than
+ // to expect the caller to do it since in almost all cases this
+ // should be false.
+ block->ignore_check = false;
+
+ // Validate Block Header Size and Check type. The caller must have
+ // already set these, so it is a programming error if this test fails.
+ if (lzma_block_header_size_decode(in[0]) != block->header_size
+ || (unsigned int)(block->check) > LZMA_CHECK_ID_MAX)
+ return LZMA_PROG_ERROR;
+
+ // Exclude the CRC32 field.
+ const size_t in_size = block->header_size - 4;
+
+ // Verify CRC32
+ if (lzma_crc32(in, in_size, 0) != read32le(in + in_size)) {
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ return LZMA_DATA_ERROR;
+#endif
+ }
+
+ // Check for unsupported flags.
+ if (in[1] & 0x3C)
+ return LZMA_OPTIONS_ERROR;
+
+ // Start after the Block Header Size and Block Flags fields.
+ size_t in_pos = 2;
+
+ // Compressed Size
+ if (in[1] & 0x40) {
+ return_if_error(lzma_vli_decode(&block->compressed_size,
+ NULL, in, &in_pos, in_size));
+
+ // Validate Compressed Size. This checks that it isn't zero
+ // and that the total size of the Block is a valid VLI.
+ if (lzma_block_unpadded_size(block) == 0)
+ return LZMA_DATA_ERROR;
+ } else {
+ block->compressed_size = LZMA_VLI_UNKNOWN;
+ }
+
+ // Uncompressed Size
+ if (in[1] & 0x80)
+ return_if_error(lzma_vli_decode(&block->uncompressed_size,
+ NULL, in, &in_pos, in_size));
+ else
+ block->uncompressed_size = LZMA_VLI_UNKNOWN;
+
+ // Filter Flags
+ const size_t filter_count = (in[1] & 3U) + 1;
+ for (size_t i = 0; i < filter_count; ++i) {
+ const lzma_ret ret = lzma_filter_flags_decode(
+ &block->filters[i], allocator,
+ in, &in_pos, in_size);
+ if (ret != LZMA_OK) {
+ lzma_filters_free(block->filters, allocator);
+ return ret;
+ }
+ }
+
+ // Padding
+ while (in_pos < in_size) {
+ if (in[in_pos++] != 0x00) {
+ lzma_filters_free(block->filters, allocator);
+
+ // Possibly some new field present so use
+ // LZMA_OPTIONS_ERROR instead of LZMA_DATA_ERROR.
+ return LZMA_OPTIONS_ERROR;
+ }
+ }
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/block_header_encoder.c b/src/liblzma/common/block_header_encoder.c
new file mode 100644
index 0000000..160425d
--- /dev/null
+++ b/src/liblzma/common/block_header_encoder.c
@@ -0,0 +1,132 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file block_header_encoder.c
+/// \brief Encodes Block Header for .xz files
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+#include "check.h"
+
+
+extern LZMA_API(lzma_ret)
+lzma_block_header_size(lzma_block *block)
+{
+ if (block->version > 1)
+ return LZMA_OPTIONS_ERROR;
+
+ // Block Header Size + Block Flags + CRC32.
+ uint32_t size = 1 + 1 + 4;
+
+ // Compressed Size
+ if (block->compressed_size != LZMA_VLI_UNKNOWN) {
+ const uint32_t add = lzma_vli_size(block->compressed_size);
+ if (add == 0 || block->compressed_size == 0)
+ return LZMA_PROG_ERROR;
+
+ size += add;
+ }
+
+ // Uncompressed Size
+ if (block->uncompressed_size != LZMA_VLI_UNKNOWN) {
+ const uint32_t add = lzma_vli_size(block->uncompressed_size);
+ if (add == 0)
+ return LZMA_PROG_ERROR;
+
+ size += add;
+ }
+
+ // List of Filter Flags
+ if (block->filters == NULL || block->filters[0].id == LZMA_VLI_UNKNOWN)
+ return LZMA_PROG_ERROR;
+
+ for (size_t i = 0; block->filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
+ // Don't allow too many filters.
+ if (i == LZMA_FILTERS_MAX)
+ return LZMA_PROG_ERROR;
+
+ uint32_t add;
+ return_if_error(lzma_filter_flags_size(&add,
+ block->filters + i));
+
+ size += add;
+ }
+
+ // Pad to a multiple of four bytes.
+ block->header_size = (size + 3) & ~UINT32_C(3);
+
+ // NOTE: We don't verify that the encoded size of the Block stays
+ // within limits. This is because it is possible that we are called
+ // with exaggerated Compressed Size (e.g. LZMA_VLI_MAX) to reserve
+ // space for Block Header, and later called again with lower,
+ // real values.
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_block_header_encode(const lzma_block *block, uint8_t *out)
+{
+ // Validate everything but filters.
+ if (lzma_block_unpadded_size(block) == 0
+ || !lzma_vli_is_valid(block->uncompressed_size))
+ return LZMA_PROG_ERROR;
+
+ // Indicate the size of the buffer _excluding_ the CRC32 field.
+ const size_t out_size = block->header_size - 4;
+
+ // Store the Block Header Size.
+ out[0] = out_size / 4;
+
+ // We write Block Flags in pieces.
+ out[1] = 0x00;
+ size_t out_pos = 2;
+
+ // Compressed Size
+ if (block->compressed_size != LZMA_VLI_UNKNOWN) {
+ return_if_error(lzma_vli_encode(block->compressed_size, NULL,
+ out, &out_pos, out_size));
+
+ out[1] |= 0x40;
+ }
+
+ // Uncompressed Size
+ if (block->uncompressed_size != LZMA_VLI_UNKNOWN) {
+ return_if_error(lzma_vli_encode(block->uncompressed_size, NULL,
+ out, &out_pos, out_size));
+
+ out[1] |= 0x80;
+ }
+
+ // Filter Flags
+ if (block->filters == NULL || block->filters[0].id == LZMA_VLI_UNKNOWN)
+ return LZMA_PROG_ERROR;
+
+ size_t filter_count = 0;
+ do {
+ // There can be a maximum of four filters.
+ if (filter_count == LZMA_FILTERS_MAX)
+ return LZMA_PROG_ERROR;
+
+ return_if_error(lzma_filter_flags_encode(
+ block->filters + filter_count,
+ out, &out_pos, out_size));
+
+ } while (block->filters[++filter_count].id != LZMA_VLI_UNKNOWN);
+
+ out[1] |= filter_count - 1;
+
+ // Padding
+ memzero(out + out_pos, out_size - out_pos);
+
+ // CRC32
+ write32le(out + out_size, lzma_crc32(out, out_size, 0));
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/block_util.c b/src/liblzma/common/block_util.c
new file mode 100644
index 0000000..acb3111
--- /dev/null
+++ b/src/liblzma/common/block_util.c
@@ -0,0 +1,90 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file block_util.c
+/// \brief Utility functions to handle lzma_block
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+#include "index.h"
+
+
+extern LZMA_API(lzma_ret)
+lzma_block_compressed_size(lzma_block *block, lzma_vli unpadded_size)
+{
+ // Validate everything but Uncompressed Size and filters.
+ if (lzma_block_unpadded_size(block) == 0)
+ return LZMA_PROG_ERROR;
+
+ const uint32_t container_size = block->header_size
+ + lzma_check_size(block->check);
+
+ // Validate that Compressed Size will be greater than zero.
+ if (unpadded_size <= container_size)
+ return LZMA_DATA_ERROR;
+
+ // Calculate what Compressed Size is supposed to be.
+ // If Compressed Size was present in Block Header,
+ // compare that the new value matches it.
+ const lzma_vli compressed_size = unpadded_size - container_size;
+ if (block->compressed_size != LZMA_VLI_UNKNOWN
+ && block->compressed_size != compressed_size)
+ return LZMA_DATA_ERROR;
+
+ block->compressed_size = compressed_size;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_vli)
+lzma_block_unpadded_size(const lzma_block *block)
+{
+ // Validate the values that we are interested in i.e. all but
+ // Uncompressed Size and the filters.
+ //
+ // NOTE: This function is used for validation too, so it is
+ // essential that these checks are always done even if
+ // Compressed Size is unknown.
+ if (block == NULL || block->version > 1
+ || block->header_size < LZMA_BLOCK_HEADER_SIZE_MIN
+ || block->header_size > LZMA_BLOCK_HEADER_SIZE_MAX
+ || (block->header_size & 3)
+ || !lzma_vli_is_valid(block->compressed_size)
+ || block->compressed_size == 0
+ || (unsigned int)(block->check) > LZMA_CHECK_ID_MAX)
+ return 0;
+
+ // If Compressed Size is unknown, return that we cannot know
+ // size of the Block either.
+ if (block->compressed_size == LZMA_VLI_UNKNOWN)
+ return LZMA_VLI_UNKNOWN;
+
+ // Calculate Unpadded Size and validate it.
+ const lzma_vli unpadded_size = block->compressed_size
+ + block->header_size
+ + lzma_check_size(block->check);
+
+ assert(unpadded_size >= UNPADDED_SIZE_MIN);
+ if (unpadded_size > UNPADDED_SIZE_MAX)
+ return 0;
+
+ return unpadded_size;
+}
+
+
+extern LZMA_API(lzma_vli)
+lzma_block_total_size(const lzma_block *block)
+{
+ lzma_vli unpadded_size = lzma_block_unpadded_size(block);
+
+ if (unpadded_size != LZMA_VLI_UNKNOWN)
+ unpadded_size = vli_ceil4(unpadded_size);
+
+ return unpadded_size;
+}
diff --git a/src/liblzma/common/common.c b/src/liblzma/common/common.c
new file mode 100644
index 0000000..adb50d7
--- /dev/null
+++ b/src/liblzma/common/common.c
@@ -0,0 +1,481 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file common.c
+/// \brief Common functions needed in many places in liblzma
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+
+
+/////////////
+// Version //
+/////////////
+
+extern LZMA_API(uint32_t)
+lzma_version_number(void)
+{
+ return LZMA_VERSION;
+}
+
+
+extern LZMA_API(const char *)
+lzma_version_string(void)
+{
+ return LZMA_VERSION_STRING;
+}
+
+
+///////////////////////
+// Memory allocation //
+///////////////////////
+
+lzma_attr_alloc_size(1)
+extern void *
+lzma_alloc(size_t size, const lzma_allocator *allocator)
+{
+ // Some malloc() variants return NULL if called with size == 0.
+ if (size == 0)
+ size = 1;
+
+ void *ptr;
+
+ if (allocator != NULL && allocator->alloc != NULL)
+ ptr = allocator->alloc(allocator->opaque, 1, size);
+ else
+ ptr = malloc(size);
+
+ return ptr;
+}
+
+
+lzma_attr_alloc_size(1)
+extern void *
+lzma_alloc_zero(size_t size, const lzma_allocator *allocator)
+{
+ // Some calloc() variants return NULL if called with size == 0.
+ if (size == 0)
+ size = 1;
+
+ void *ptr;
+
+ if (allocator != NULL && allocator->alloc != NULL) {
+ ptr = allocator->alloc(allocator->opaque, 1, size);
+ if (ptr != NULL)
+ memzero(ptr, size);
+ } else {
+ ptr = calloc(1, size);
+ }
+
+ return ptr;
+}
+
+
+extern void
+lzma_free(void *ptr, const lzma_allocator *allocator)
+{
+ if (allocator != NULL && allocator->free != NULL)
+ allocator->free(allocator->opaque, ptr);
+ else
+ free(ptr);
+
+ return;
+}
+
+
+//////////
+// Misc //
+//////////
+
+extern size_t
+lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size)
+{
+ const size_t in_avail = in_size - *in_pos;
+ const size_t out_avail = out_size - *out_pos;
+ const size_t copy_size = my_min(in_avail, out_avail);
+
+ // Call memcpy() only if there is something to copy. If there is
+ // nothing to copy, in or out might be NULL and then the memcpy()
+ // call would trigger undefined behavior.
+ if (copy_size > 0)
+ memcpy(out + *out_pos, in + *in_pos, copy_size);
+
+ *in_pos += copy_size;
+ *out_pos += copy_size;
+
+ return copy_size;
+}
+
+
+extern lzma_ret
+lzma_next_filter_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ lzma_next_coder_init(filters[0].init, next, allocator);
+ next->id = filters[0].id;
+ return filters[0].init == NULL
+ ? LZMA_OK : filters[0].init(next, allocator, filters);
+}
+
+
+extern lzma_ret
+lzma_next_filter_update(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter *reversed_filters)
+{
+ // Check that the application isn't trying to change the Filter ID.
+ // End of filters is indicated with LZMA_VLI_UNKNOWN in both
+ // reversed_filters[0].id and next->id.
+ if (reversed_filters[0].id != next->id)
+ return LZMA_PROG_ERROR;
+
+ if (reversed_filters[0].id == LZMA_VLI_UNKNOWN)
+ return LZMA_OK;
+
+ assert(next->update != NULL);
+ return next->update(next->coder, allocator, NULL, reversed_filters);
+}
+
+
+extern void
+lzma_next_end(lzma_next_coder *next, const lzma_allocator *allocator)
+{
+ if (next->init != (uintptr_t)(NULL)) {
+ // To avoid tiny end functions that simply call
+ // lzma_free(coder, allocator), we allow leaving next->end
+ // NULL and call lzma_free() here.
+ if (next->end != NULL)
+ next->end(next->coder, allocator);
+ else
+ lzma_free(next->coder, allocator);
+
+ // Reset the variables so the we don't accidentally think
+ // that it is an already initialized coder.
+ *next = LZMA_NEXT_CODER_INIT;
+ }
+
+ return;
+}
+
+
+//////////////////////////////////////
+// External to internal API wrapper //
+//////////////////////////////////////
+
+extern lzma_ret
+lzma_strm_init(lzma_stream *strm)
+{
+ if (strm == NULL)
+ return LZMA_PROG_ERROR;
+
+ if (strm->internal == NULL) {
+ strm->internal = lzma_alloc(sizeof(lzma_internal),
+ strm->allocator);
+ if (strm->internal == NULL)
+ return LZMA_MEM_ERROR;
+
+ strm->internal->next = LZMA_NEXT_CODER_INIT;
+ }
+
+ memzero(strm->internal->supported_actions,
+ sizeof(strm->internal->supported_actions));
+ strm->internal->sequence = ISEQ_RUN;
+ strm->internal->allow_buf_error = false;
+
+ strm->total_in = 0;
+ strm->total_out = 0;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_code(lzma_stream *strm, lzma_action action)
+{
+ // Sanity checks
+ if ((strm->next_in == NULL && strm->avail_in != 0)
+ || (strm->next_out == NULL && strm->avail_out != 0)
+ || strm->internal == NULL
+ || strm->internal->next.code == NULL
+ || (unsigned int)(action) > LZMA_ACTION_MAX
+ || !strm->internal->supported_actions[action])
+ return LZMA_PROG_ERROR;
+
+ // Check if unsupported members have been set to non-zero or non-NULL,
+ // which would indicate that some new feature is wanted.
+ if (strm->reserved_ptr1 != NULL
+ || strm->reserved_ptr2 != NULL
+ || strm->reserved_ptr3 != NULL
+ || strm->reserved_ptr4 != NULL
+ || strm->reserved_int2 != 0
+ || strm->reserved_int3 != 0
+ || strm->reserved_int4 != 0
+ || strm->reserved_enum1 != LZMA_RESERVED_ENUM
+ || strm->reserved_enum2 != LZMA_RESERVED_ENUM)
+ return LZMA_OPTIONS_ERROR;
+
+ switch (strm->internal->sequence) {
+ case ISEQ_RUN:
+ switch (action) {
+ case LZMA_RUN:
+ break;
+
+ case LZMA_SYNC_FLUSH:
+ strm->internal->sequence = ISEQ_SYNC_FLUSH;
+ break;
+
+ case LZMA_FULL_FLUSH:
+ strm->internal->sequence = ISEQ_FULL_FLUSH;
+ break;
+
+ case LZMA_FINISH:
+ strm->internal->sequence = ISEQ_FINISH;
+ break;
+
+ case LZMA_FULL_BARRIER:
+ strm->internal->sequence = ISEQ_FULL_BARRIER;
+ break;
+ }
+
+ break;
+
+ case ISEQ_SYNC_FLUSH:
+ // The same action must be used until we return
+ // LZMA_STREAM_END, and the amount of input must not change.
+ if (action != LZMA_SYNC_FLUSH
+ || strm->internal->avail_in != strm->avail_in)
+ return LZMA_PROG_ERROR;
+
+ break;
+
+ case ISEQ_FULL_FLUSH:
+ if (action != LZMA_FULL_FLUSH
+ || strm->internal->avail_in != strm->avail_in)
+ return LZMA_PROG_ERROR;
+
+ break;
+
+ case ISEQ_FINISH:
+ if (action != LZMA_FINISH
+ || strm->internal->avail_in != strm->avail_in)
+ return LZMA_PROG_ERROR;
+
+ break;
+
+ case ISEQ_FULL_BARRIER:
+ if (action != LZMA_FULL_BARRIER
+ || strm->internal->avail_in != strm->avail_in)
+ return LZMA_PROG_ERROR;
+
+ break;
+
+ case ISEQ_END:
+ return LZMA_STREAM_END;
+
+ case ISEQ_ERROR:
+ default:
+ return LZMA_PROG_ERROR;
+ }
+
+ size_t in_pos = 0;
+ size_t out_pos = 0;
+ lzma_ret ret = strm->internal->next.code(
+ strm->internal->next.coder, strm->allocator,
+ strm->next_in, &in_pos, strm->avail_in,
+ strm->next_out, &out_pos, strm->avail_out, action);
+
+ // Updating next_in and next_out has to be skipped when they are NULL
+ // to avoid null pointer + 0 (undefined behavior). Do this by checking
+ // in_pos > 0 and out_pos > 0 because this way NULL + non-zero (a bug)
+ // will get caught one way or other.
+ if (in_pos > 0) {
+ strm->next_in += in_pos;
+ strm->avail_in -= in_pos;
+ strm->total_in += in_pos;
+ }
+
+ if (out_pos > 0) {
+ strm->next_out += out_pos;
+ strm->avail_out -= out_pos;
+ strm->total_out += out_pos;
+ }
+
+ strm->internal->avail_in = strm->avail_in;
+
+ switch (ret) {
+ case LZMA_OK:
+ // Don't return LZMA_BUF_ERROR when it happens the first time.
+ // This is to avoid returning LZMA_BUF_ERROR when avail_out
+ // was zero but still there was no more data left to written
+ // to next_out.
+ if (out_pos == 0 && in_pos == 0) {
+ if (strm->internal->allow_buf_error)
+ ret = LZMA_BUF_ERROR;
+ else
+ strm->internal->allow_buf_error = true;
+ } else {
+ strm->internal->allow_buf_error = false;
+ }
+ break;
+
+ case LZMA_TIMED_OUT:
+ strm->internal->allow_buf_error = false;
+ ret = LZMA_OK;
+ break;
+
+ case LZMA_SEEK_NEEDED:
+ strm->internal->allow_buf_error = false;
+
+ // If LZMA_FINISH was used, reset it back to the
+ // LZMA_RUN-based state so that new input can be supplied
+ // by the application.
+ if (strm->internal->sequence == ISEQ_FINISH)
+ strm->internal->sequence = ISEQ_RUN;
+
+ break;
+
+ case LZMA_STREAM_END:
+ if (strm->internal->sequence == ISEQ_SYNC_FLUSH
+ || strm->internal->sequence == ISEQ_FULL_FLUSH
+ || strm->internal->sequence
+ == ISEQ_FULL_BARRIER)
+ strm->internal->sequence = ISEQ_RUN;
+ else
+ strm->internal->sequence = ISEQ_END;
+
+ // Fall through
+
+ case LZMA_NO_CHECK:
+ case LZMA_UNSUPPORTED_CHECK:
+ case LZMA_GET_CHECK:
+ case LZMA_MEMLIMIT_ERROR:
+ // Something else than LZMA_OK, but not a fatal error,
+ // that is, coding may be continued (except if ISEQ_END).
+ strm->internal->allow_buf_error = false;
+ break;
+
+ default:
+ // All the other errors are fatal; coding cannot be continued.
+ assert(ret != LZMA_BUF_ERROR);
+ strm->internal->sequence = ISEQ_ERROR;
+ break;
+ }
+
+ return ret;
+}
+
+
+extern LZMA_API(void)
+lzma_end(lzma_stream *strm)
+{
+ if (strm != NULL && strm->internal != NULL) {
+ lzma_next_end(&strm->internal->next, strm->allocator);
+ lzma_free(strm->internal, strm->allocator);
+ strm->internal = NULL;
+ }
+
+ return;
+}
+
+
+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+// This is for compatibility with binaries linked against liblzma that
+// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7.
+LZMA_SYMVER_API("lzma_get_progress@XZ_5.2.2",
+ void, lzma_get_progress_522)(lzma_stream *strm,
+ uint64_t *progress_in, uint64_t *progress_out) lzma_nothrow
+ __attribute__((__alias__("lzma_get_progress_52")));
+
+LZMA_SYMVER_API("lzma_get_progress@@XZ_5.2",
+ void, lzma_get_progress_52)(lzma_stream *strm,
+ uint64_t *progress_in, uint64_t *progress_out) lzma_nothrow;
+
+#define lzma_get_progress lzma_get_progress_52
+#endif
+extern LZMA_API(void)
+lzma_get_progress(lzma_stream *strm,
+ uint64_t *progress_in, uint64_t *progress_out)
+{
+ if (strm->internal->next.get_progress != NULL) {
+ strm->internal->next.get_progress(strm->internal->next.coder,
+ progress_in, progress_out);
+ } else {
+ *progress_in = strm->total_in;
+ *progress_out = strm->total_out;
+ }
+
+ return;
+}
+
+
+extern LZMA_API(lzma_check)
+lzma_get_check(const lzma_stream *strm)
+{
+ // Return LZMA_CHECK_NONE if we cannot know the check type.
+ // It's a bug in the application if this happens.
+ if (strm->internal->next.get_check == NULL)
+ return LZMA_CHECK_NONE;
+
+ return strm->internal->next.get_check(strm->internal->next.coder);
+}
+
+
+extern LZMA_API(uint64_t)
+lzma_memusage(const lzma_stream *strm)
+{
+ uint64_t memusage;
+ uint64_t old_memlimit;
+
+ if (strm == NULL || strm->internal == NULL
+ || strm->internal->next.memconfig == NULL
+ || strm->internal->next.memconfig(
+ strm->internal->next.coder,
+ &memusage, &old_memlimit, 0) != LZMA_OK)
+ return 0;
+
+ return memusage;
+}
+
+
+extern LZMA_API(uint64_t)
+lzma_memlimit_get(const lzma_stream *strm)
+{
+ uint64_t old_memlimit;
+ uint64_t memusage;
+
+ if (strm == NULL || strm->internal == NULL
+ || strm->internal->next.memconfig == NULL
+ || strm->internal->next.memconfig(
+ strm->internal->next.coder,
+ &memusage, &old_memlimit, 0) != LZMA_OK)
+ return 0;
+
+ return old_memlimit;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_memlimit_set(lzma_stream *strm, uint64_t new_memlimit)
+{
+ // Dummy variables to simplify memconfig functions
+ uint64_t old_memlimit;
+ uint64_t memusage;
+
+ if (strm == NULL || strm->internal == NULL
+ || strm->internal->next.memconfig == NULL)
+ return LZMA_PROG_ERROR;
+
+ // Zero is a special value that cannot be used as an actual limit.
+ // If 0 was specified, use 1 instead.
+ if (new_memlimit == 0)
+ new_memlimit = 1;
+
+ return strm->internal->next.memconfig(strm->internal->next.coder,
+ &memusage, &old_memlimit, new_memlimit);
+}
diff --git a/src/liblzma/common/common.h b/src/liblzma/common/common.h
new file mode 100644
index 0000000..378923e
--- /dev/null
+++ b/src/liblzma/common/common.h
@@ -0,0 +1,413 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file common.h
+/// \brief Definitions common to the whole liblzma library
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_COMMON_H
+#define LZMA_COMMON_H
+
+#include "sysdefs.h"
+#include "mythread.h"
+#include "tuklib_integer.h"
+
+// LZMA_API_EXPORT is used to mark the exported API functions.
+// It's used to define the LZMA_API macro.
+//
+// lzma_attr_visibility_hidden is used for marking *declarations* of extern
+// variables that are internal to liblzma (-fvisibility=hidden alone is
+// enough to hide the *definitions*). Such markings allow slightly more
+// efficient code to accesses those variables in ELF shared libraries.
+#if defined(_WIN32) || defined(__CYGWIN__)
+# ifdef DLL_EXPORT
+# define LZMA_API_EXPORT __declspec(dllexport)
+# else
+# define LZMA_API_EXPORT
+# endif
+# define lzma_attr_visibility_hidden
+// Don't use ifdef or defined() below.
+#elif HAVE_VISIBILITY
+# define LZMA_API_EXPORT __attribute__((__visibility__("default")))
+# define lzma_attr_visibility_hidden \
+ __attribute__((__visibility__("hidden")))
+#else
+# define LZMA_API_EXPORT
+# define lzma_attr_visibility_hidden
+#endif
+
+#define LZMA_API(type) LZMA_API_EXPORT type LZMA_API_CALL
+
+#include "lzma.h"
+
+// This is for detecting modern GCC and Clang attributes
+// like __symver__ in GCC >= 10.
+#ifdef __has_attribute
+# define lzma_has_attribute(attr) __has_attribute(attr)
+#else
+# define lzma_has_attribute(attr) 0
+#endif
+
+// The extra symbol versioning in the C files may only be used when
+// building a shared library. If HAVE_SYMBOL_VERSIONS_LINUX is defined
+// to 2 then symbol versioning is done only if also PIC is defined.
+// By default Libtool defines PIC when building a shared library and
+// doesn't define it when building a static library but it can be
+// overridden with --with-pic and --without-pic. configure let's rely
+// on PIC if neither --with-pic or --without-pic was used.
+#if defined(HAVE_SYMBOL_VERSIONS_LINUX) \
+ && (HAVE_SYMBOL_VERSIONS_LINUX == 2 && !defined(PIC))
+# undef HAVE_SYMBOL_VERSIONS_LINUX
+#endif
+
+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+// To keep link-time optimization (LTO, -flto) working with GCC,
+// the __symver__ attribute must be used instead of __asm__(".symver ...").
+// Otherwise the symbol versions may be lost, resulting in broken liblzma
+// that has wrong default versions in the exported symbol list!
+// The attribute was added in GCC 10; LTO with older GCC is not supported.
+//
+// To keep -Wmissing-prototypes happy, use LZMA_SYMVER_API only with function
+// declarations (including those with __alias__ attribute) and LZMA_API with
+// the function definitions. This means a little bit of silly copy-and-paste
+// between declarations and definitions though.
+//
+// As of GCC 12.2, the __symver__ attribute supports only @ and @@ but the
+// very convenient @@@ isn't supported (it's supported by GNU assembler
+// since 2000). When using @@ instead of @@@, the internal name must not be
+// the same as the external name to avoid problems in some situations. This
+// is why "#define foo_52 foo" is needed for the default symbol versions.
+//
+// __has_attribute is supported before GCC 10 and it is supported in Clang 14
+// too (which doesn't support __symver__) so use it to detect if __symver__
+// is available. This should be far more reliable than looking at compiler
+// version macros as nowadays especially __GNUC__ is defined by many compilers.
+# if lzma_has_attribute(__symver__)
+# define LZMA_SYMVER_API(extnamever, type, intname) \
+ extern __attribute__((__symver__(extnamever))) \
+ LZMA_API(type) intname
+# else
+# define LZMA_SYMVER_API(extnamever, type, intname) \
+ __asm__(".symver " #intname "," extnamever); \
+ extern LZMA_API(type) intname
+# endif
+#endif
+
+// MSVC has __forceinline which shouldn't be combined with the inline keyword
+// (results in a warning).
+//
+// GCC 3.1 added always_inline attribute so we don't need to check
+// for __GNUC__ version. Similarly, all relevant Clang versions
+// support it (at least Clang 3.0.0 does already).
+// Other compilers might support too which also support __has_attribute
+// (Solaris Studio) so do that check too.
+#if defined(_MSC_VER)
+# define lzma_always_inline __forceinline
+#elif defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) \
+ || lzma_has_attribute(__always_inline__)
+# define lzma_always_inline inline __attribute__((__always_inline__))
+#else
+# define lzma_always_inline inline
+#endif
+
+// These allow helping the compiler in some often-executed branches, whose
+// result is almost always the same.
+#ifdef __GNUC__
+# define likely(expr) __builtin_expect(expr, true)
+# define unlikely(expr) __builtin_expect(expr, false)
+#else
+# define likely(expr) (expr)
+# define unlikely(expr) (expr)
+#endif
+
+
+/// Size of temporary buffers needed in some filters
+#define LZMA_BUFFER_SIZE 4096
+
+
+/// Maximum number of worker threads within one multithreaded component.
+/// The limit exists solely to make it simpler to prevent integer overflows
+/// when allocating structures etc. This should be big enough for now...
+/// the code won't scale anywhere close to this number anyway.
+#define LZMA_THREADS_MAX 16384
+
+
+/// Starting value for memory usage estimates. Instead of calculating size
+/// of _every_ structure and taking into account malloc() overhead etc., we
+/// add a base size to all memory usage estimates. It's not very accurate
+/// but should be easily good enough.
+#define LZMA_MEMUSAGE_BASE (UINT64_C(1) << 15)
+
+/// Start of internal Filter ID space. These IDs must never be used
+/// in Streams.
+#define LZMA_FILTER_RESERVED_START (LZMA_VLI_C(1) << 62)
+
+
+/// Supported flags that can be passed to lzma_stream_decoder(),
+/// lzma_auto_decoder(), or lzma_stream_decoder_mt().
+#define LZMA_SUPPORTED_FLAGS \
+ ( LZMA_TELL_NO_CHECK \
+ | LZMA_TELL_UNSUPPORTED_CHECK \
+ | LZMA_TELL_ANY_CHECK \
+ | LZMA_IGNORE_CHECK \
+ | LZMA_CONCATENATED \
+ | LZMA_FAIL_FAST )
+
+
+/// Largest valid lzma_action value as unsigned integer.
+#define LZMA_ACTION_MAX ((unsigned int)(LZMA_FULL_BARRIER))
+
+
+/// Special return value (lzma_ret) to indicate that a timeout was reached
+/// and lzma_code() must not return LZMA_BUF_ERROR. This is converted to
+/// LZMA_OK in lzma_code().
+#define LZMA_TIMED_OUT LZMA_RET_INTERNAL1
+
+/// Special return value (lzma_ret) for use in stream_decoder_mt.c to
+/// indicate Index was detected instead of a Block Header.
+#define LZMA_INDEX_DETECTED LZMA_RET_INTERNAL2
+
+
+typedef struct lzma_next_coder_s lzma_next_coder;
+
+typedef struct lzma_filter_info_s lzma_filter_info;
+
+
+/// Type of a function used to initialize a filter encoder or decoder
+typedef lzma_ret (*lzma_init_function)(
+ lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+/// Type of a function to do some kind of coding work (filters, Stream,
+/// Block encoders/decoders etc.). Some special coders use don't use both
+/// input and output buffers, but for simplicity they still use this same
+/// function prototype.
+typedef lzma_ret (*lzma_code_function)(
+ void *coder, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size,
+ lzma_action action);
+
+/// Type of a function to free the memory allocated for the coder
+typedef void (*lzma_end_function)(
+ void *coder, const lzma_allocator *allocator);
+
+
+/// Raw coder validates and converts an array of lzma_filter structures to
+/// an array of lzma_filter_info structures. This array is used with
+/// lzma_next_filter_init to initialize the filter chain.
+struct lzma_filter_info_s {
+ /// Filter ID. This can be used to share the same initiazation
+ /// function *and* data structures with different Filter IDs
+ /// (LZMA_FILTER_LZMA1EXT does it), and also by the encoder
+ /// with lzma_filters_update() if filter chain is updated
+ /// in the middle of a raw stream or Block (LZMA_SYNC_FLUSH).
+ lzma_vli id;
+
+ /// Pointer to function used to initialize the filter.
+ /// This is NULL to indicate end of array.
+ lzma_init_function init;
+
+ /// Pointer to filter's options structure
+ void *options;
+};
+
+
+/// Hold data and function pointers of the next filter in the chain.
+struct lzma_next_coder_s {
+ /// Pointer to coder-specific data
+ void *coder;
+
+ /// Filter ID. This is LZMA_VLI_UNKNOWN when this structure doesn't
+ /// point to a filter coder.
+ lzma_vli id;
+
+ /// "Pointer" to init function. This is never called here.
+ /// We need only to detect if we are initializing a coder
+ /// that was allocated earlier. See lzma_next_coder_init and
+ /// lzma_next_strm_init macros in this file.
+ uintptr_t init;
+
+ /// Pointer to function to do the actual coding
+ lzma_code_function code;
+
+ /// Pointer to function to free lzma_next_coder.coder. This can
+ /// be NULL; in that case, lzma_free is called to free
+ /// lzma_next_coder.coder.
+ lzma_end_function end;
+
+ /// Pointer to a function to get progress information. If this is NULL,
+ /// lzma_stream.total_in and .total_out are used instead.
+ void (*get_progress)(void *coder,
+ uint64_t *progress_in, uint64_t *progress_out);
+
+ /// Pointer to function to return the type of the integrity check.
+ /// Most coders won't support this.
+ lzma_check (*get_check)(const void *coder);
+
+ /// Pointer to function to get and/or change the memory usage limit.
+ /// If new_memlimit == 0, the limit is not changed.
+ lzma_ret (*memconfig)(void *coder, uint64_t *memusage,
+ uint64_t *old_memlimit, uint64_t new_memlimit);
+
+ /// Update the filter-specific options or the whole filter chain
+ /// in the encoder.
+ lzma_ret (*update)(void *coder, const lzma_allocator *allocator,
+ const lzma_filter *filters,
+ const lzma_filter *reversed_filters);
+
+ /// Set how many bytes of output this coder may produce at maximum.
+ /// On success LZMA_OK must be returned.
+ /// If the filter chain as a whole cannot support this feature,
+ /// this must return LZMA_OPTIONS_ERROR.
+ /// If no input has been given to the coder and the requested limit
+ /// is too small, this must return LZMA_BUF_ERROR. If input has been
+ /// seen, LZMA_OK is allowed too.
+ lzma_ret (*set_out_limit)(void *coder, uint64_t *uncomp_size,
+ uint64_t out_limit);
+};
+
+
+/// Macro to initialize lzma_next_coder structure
+#define LZMA_NEXT_CODER_INIT \
+ (lzma_next_coder){ \
+ .coder = NULL, \
+ .init = (uintptr_t)(NULL), \
+ .id = LZMA_VLI_UNKNOWN, \
+ .code = NULL, \
+ .end = NULL, \
+ .get_progress = NULL, \
+ .get_check = NULL, \
+ .memconfig = NULL, \
+ .update = NULL, \
+ .set_out_limit = NULL, \
+ }
+
+
+/// Internal data for lzma_strm_init, lzma_code, and lzma_end. A pointer to
+/// this is stored in lzma_stream.
+struct lzma_internal_s {
+ /// The actual coder that should do something useful
+ lzma_next_coder next;
+
+ /// Track the state of the coder. This is used to validate arguments
+ /// so that the actual coders can rely on e.g. that LZMA_SYNC_FLUSH
+ /// is used on every call to lzma_code until next.code has returned
+ /// LZMA_STREAM_END.
+ enum {
+ ISEQ_RUN,
+ ISEQ_SYNC_FLUSH,
+ ISEQ_FULL_FLUSH,
+ ISEQ_FINISH,
+ ISEQ_FULL_BARRIER,
+ ISEQ_END,
+ ISEQ_ERROR,
+ } sequence;
+
+ /// A copy of lzma_stream avail_in. This is used to verify that the
+ /// amount of input doesn't change once e.g. LZMA_FINISH has been
+ /// used.
+ size_t avail_in;
+
+ /// Indicates which lzma_action values are allowed by next.code.
+ bool supported_actions[LZMA_ACTION_MAX + 1];
+
+ /// If true, lzma_code will return LZMA_BUF_ERROR if no progress was
+ /// made (no input consumed and no output produced by next.code).
+ bool allow_buf_error;
+};
+
+
+/// Allocates memory
+lzma_attr_alloc_size(1)
+extern void *lzma_alloc(size_t size, const lzma_allocator *allocator);
+
+/// Allocates memory and zeroes it (like calloc()). This can be faster
+/// than lzma_alloc() + memzero() while being backward compatible with
+/// custom allocators.
+lzma_attr_alloc_size(1)
+extern void *lzma_alloc_zero(size_t size, const lzma_allocator *allocator);
+
+/// Frees memory
+extern void lzma_free(void *ptr, const lzma_allocator *allocator);
+
+
+/// Allocates strm->internal if it is NULL, and initializes *strm and
+/// strm->internal. This function is only called via lzma_next_strm_init macro.
+extern lzma_ret lzma_strm_init(lzma_stream *strm);
+
+/// Initializes the next filter in the chain, if any. This takes care of
+/// freeing the memory of previously initialized filter if it is different
+/// than the filter being initialized now. This way the actual filter
+/// initialization functions don't need to use lzma_next_coder_init macro.
+extern lzma_ret lzma_next_filter_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+/// Update the next filter in the chain, if any. This checks that
+/// the application is not trying to change the Filter IDs.
+extern lzma_ret lzma_next_filter_update(
+ lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter *reversed_filters);
+
+/// Frees the memory allocated for next->coder either using next->end or,
+/// if next->end is NULL, using lzma_free.
+extern void lzma_next_end(lzma_next_coder *next,
+ const lzma_allocator *allocator);
+
+
+/// Copy as much data as possible from in[] to out[] and update *in_pos
+/// and *out_pos accordingly. Returns the number of bytes copied.
+extern size_t lzma_bufcpy(const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size);
+
+
+/// \brief Return if expression doesn't evaluate to LZMA_OK
+///
+/// There are several situations where we want to return immediately
+/// with the value of expr if it isn't LZMA_OK. This macro shortens
+/// the code a little.
+#define return_if_error(expr) \
+do { \
+ const lzma_ret ret_ = (expr); \
+ if (ret_ != LZMA_OK) \
+ return ret_; \
+} while (0)
+
+
+/// If next isn't already initialized, free the previous coder. Then mark
+/// that next is _possibly_ initialized for the coder using this macro.
+/// "Possibly" means that if e.g. allocation of next->coder fails, the
+/// structure isn't actually initialized for this coder, but leaving
+/// next->init to func is still OK.
+#define lzma_next_coder_init(func, next, allocator) \
+do { \
+ if ((uintptr_t)(func) != (next)->init) \
+ lzma_next_end(next, allocator); \
+ (next)->init = (uintptr_t)(func); \
+} while (0)
+
+
+/// Initializes lzma_strm and calls func() to initialize strm->internal->next.
+/// (The function being called will use lzma_next_coder_init()). If
+/// initialization fails, memory that wasn't freed by func() is freed
+/// along strm->internal.
+#define lzma_next_strm_init(func, strm, ...) \
+do { \
+ return_if_error(lzma_strm_init(strm)); \
+ const lzma_ret ret_ = func(&(strm)->internal->next, \
+ (strm)->allocator, __VA_ARGS__); \
+ if (ret_ != LZMA_OK) { \
+ lzma_end(strm); \
+ return ret_; \
+ } \
+} while (0)
+
+#endif
diff --git a/src/liblzma/common/easy_buffer_encoder.c b/src/liblzma/common/easy_buffer_encoder.c
new file mode 100644
index 0000000..48eb56f
--- /dev/null
+++ b/src/liblzma/common/easy_buffer_encoder.c
@@ -0,0 +1,27 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file easy_buffer_encoder.c
+/// \brief Easy single-call .xz Stream encoder
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "easy_preset.h"
+
+
+extern LZMA_API(lzma_ret)
+lzma_easy_buffer_encode(uint32_t preset, lzma_check check,
+ const lzma_allocator *allocator, const uint8_t *in,
+ size_t in_size, uint8_t *out, size_t *out_pos, size_t out_size)
+{
+ lzma_options_easy opt_easy;
+ if (lzma_easy_preset(&opt_easy, preset))
+ return LZMA_OPTIONS_ERROR;
+
+ return lzma_stream_buffer_encode(opt_easy.filters, check,
+ allocator, in, in_size, out, out_pos, out_size);
+}
diff --git a/src/liblzma/common/easy_decoder_memusage.c b/src/liblzma/common/easy_decoder_memusage.c
new file mode 100644
index 0000000..20bcd5b
--- /dev/null
+++ b/src/liblzma/common/easy_decoder_memusage.c
@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file easy_decoder_memusage.c
+/// \brief Decoder memory usage calculation to match easy encoder presets
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "easy_preset.h"
+
+
+extern LZMA_API(uint64_t)
+lzma_easy_decoder_memusage(uint32_t preset)
+{
+ lzma_options_easy opt_easy;
+ if (lzma_easy_preset(&opt_easy, preset))
+ return UINT32_MAX;
+
+ return lzma_raw_decoder_memusage(opt_easy.filters);
+}
diff --git a/src/liblzma/common/easy_encoder.c b/src/liblzma/common/easy_encoder.c
new file mode 100644
index 0000000..5cb492d
--- /dev/null
+++ b/src/liblzma/common/easy_encoder.c
@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file easy_encoder.c
+/// \brief Easy .xz Stream encoder initialization
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "easy_preset.h"
+
+
+extern LZMA_API(lzma_ret)
+lzma_easy_encoder(lzma_stream *strm, uint32_t preset, lzma_check check)
+{
+ lzma_options_easy opt_easy;
+ if (lzma_easy_preset(&opt_easy, preset))
+ return LZMA_OPTIONS_ERROR;
+
+ return lzma_stream_encoder(strm, opt_easy.filters, check);
+}
diff --git a/src/liblzma/common/easy_encoder_memusage.c b/src/liblzma/common/easy_encoder_memusage.c
new file mode 100644
index 0000000..e910575
--- /dev/null
+++ b/src/liblzma/common/easy_encoder_memusage.c
@@ -0,0 +1,24 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file easy_encoder_memusage.c
+/// \brief Easy .xz Stream encoder memory usage calculation
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "easy_preset.h"
+
+
+extern LZMA_API(uint64_t)
+lzma_easy_encoder_memusage(uint32_t preset)
+{
+ lzma_options_easy opt_easy;
+ if (lzma_easy_preset(&opt_easy, preset))
+ return UINT32_MAX;
+
+ return lzma_raw_encoder_memusage(opt_easy.filters);
+}
diff --git a/src/liblzma/common/easy_preset.c b/src/liblzma/common/easy_preset.c
new file mode 100644
index 0000000..2f98598
--- /dev/null
+++ b/src/liblzma/common/easy_preset.c
@@ -0,0 +1,27 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file easy_preset.c
+/// \brief Preset handling for easy encoder and decoder
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "easy_preset.h"
+
+
+extern bool
+lzma_easy_preset(lzma_options_easy *opt_easy, uint32_t preset)
+{
+ if (lzma_lzma_preset(&opt_easy->opt_lzma, preset))
+ return true;
+
+ opt_easy->filters[0].id = LZMA_FILTER_LZMA2;
+ opt_easy->filters[0].options = &opt_easy->opt_lzma;
+ opt_easy->filters[1].id = LZMA_VLI_UNKNOWN;
+
+ return false;
+}
diff --git a/src/liblzma/common/easy_preset.h b/src/liblzma/common/easy_preset.h
new file mode 100644
index 0000000..382ade8
--- /dev/null
+++ b/src/liblzma/common/easy_preset.h
@@ -0,0 +1,32 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file easy_preset.h
+/// \brief Preset handling for easy encoder and decoder
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+
+
+typedef struct {
+ /// We need to keep the filters array available in case
+ /// LZMA_FULL_FLUSH is used.
+ lzma_filter filters[LZMA_FILTERS_MAX + 1];
+
+ /// Options for LZMA2
+ lzma_options_lzma opt_lzma;
+
+ // Options for more filters can be added later, so this struct
+ // is not ready to be put into the public API.
+
+} lzma_options_easy;
+
+
+/// Set *easy to the settings given by the preset. Returns true on error,
+/// false on success.
+extern bool lzma_easy_preset(lzma_options_easy *easy, uint32_t preset);
diff --git a/src/liblzma/common/file_info.c b/src/liblzma/common/file_info.c
new file mode 100644
index 0000000..799bb02
--- /dev/null
+++ b/src/liblzma/common/file_info.c
@@ -0,0 +1,855 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file file_info.c
+/// \brief Decode .xz file information into a lzma_index structure
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "index_decoder.h"
+
+
+typedef struct {
+ enum {
+ SEQ_MAGIC_BYTES,
+ SEQ_PADDING_SEEK,
+ SEQ_PADDING_DECODE,
+ SEQ_FOOTER,
+ SEQ_INDEX_INIT,
+ SEQ_INDEX_DECODE,
+ SEQ_HEADER_DECODE,
+ SEQ_HEADER_COMPARE,
+ } sequence;
+
+ /// Absolute position of in[*in_pos] in the file. All code that
+ /// modifies *in_pos also updates this. seek_to_pos() needs this
+ /// to determine if we need to request the application to seek for
+ /// us or if we can do the seeking internally by adjusting *in_pos.
+ uint64_t file_cur_pos;
+
+ /// This refers to absolute positions of interesting parts of the
+ /// input file. Sometimes it points to the *beginning* of a specific
+ /// field and sometimes to the *end* of a field. The current target
+ /// position at each moment is explained in the comments.
+ uint64_t file_target_pos;
+
+ /// Size of the .xz file (from the application).
+ uint64_t file_size;
+
+ /// Index decoder
+ lzma_next_coder index_decoder;
+
+ /// Number of bytes remaining in the Index field that is currently
+ /// being decoded.
+ lzma_vli index_remaining;
+
+ /// The Index decoder will store the decoded Index in this pointer.
+ lzma_index *this_index;
+
+ /// Amount of Stream Padding in the current Stream.
+ lzma_vli stream_padding;
+
+ /// The final combined index is collected here.
+ lzma_index *combined_index;
+
+ /// Pointer from the application where to store the index information
+ /// after successful decoding.
+ lzma_index **dest_index;
+
+ /// Pointer to lzma_stream.seek_pos to be used when returning
+ /// LZMA_SEEK_NEEDED. This is set by seek_to_pos() when needed.
+ uint64_t *external_seek_pos;
+
+ /// Memory usage limit
+ uint64_t memlimit;
+
+ /// Stream Flags from the very beginning of the file.
+ lzma_stream_flags first_header_flags;
+
+ /// Stream Flags from Stream Header of the current Stream.
+ lzma_stream_flags header_flags;
+
+ /// Stream Flags from Stream Footer of the current Stream.
+ lzma_stream_flags footer_flags;
+
+ size_t temp_pos;
+ size_t temp_size;
+ uint8_t temp[8192];
+
+} lzma_file_info_coder;
+
+
+/// Copies data from in[*in_pos] into coder->temp until
+/// coder->temp_pos == coder->temp_size. This also keeps coder->file_cur_pos
+/// in sync with *in_pos. Returns true if more input is needed.
+static bool
+fill_temp(lzma_file_info_coder *coder, const uint8_t *restrict in,
+ size_t *restrict in_pos, size_t in_size)
+{
+ coder->file_cur_pos += lzma_bufcpy(in, in_pos, in_size,
+ coder->temp, &coder->temp_pos, coder->temp_size);
+ return coder->temp_pos < coder->temp_size;
+}
+
+
+/// Seeks to the absolute file position specified by target_pos.
+/// This tries to do the seeking by only modifying *in_pos, if possible.
+/// The main benefit of this is that if one passes the whole file at once
+/// to lzma_code(), the decoder will never need to return LZMA_SEEK_NEEDED
+/// as all the seeking can be done by adjusting *in_pos in this function.
+///
+/// Returns true if an external seek is needed and the caller must return
+/// LZMA_SEEK_NEEDED.
+static bool
+seek_to_pos(lzma_file_info_coder *coder, uint64_t target_pos,
+ size_t in_start, size_t *in_pos, size_t in_size)
+{
+ // The input buffer doesn't extend beyond the end of the file.
+ // This has been checked by file_info_decode() already.
+ assert(coder->file_size - coder->file_cur_pos >= in_size - *in_pos);
+
+ const uint64_t pos_min = coder->file_cur_pos - (*in_pos - in_start);
+ const uint64_t pos_max = coder->file_cur_pos + (in_size - *in_pos);
+
+ bool external_seek_needed;
+
+ if (target_pos >= pos_min && target_pos <= pos_max) {
+ // The requested position is available in the current input
+ // buffer or right after it. That is, in a corner case we
+ // end up setting *in_pos == in_size and thus will immediately
+ // need new input bytes from the application.
+ *in_pos += (size_t)(target_pos - coder->file_cur_pos);
+ external_seek_needed = false;
+ } else {
+ // Ask the application to seek the input file.
+ *coder->external_seek_pos = target_pos;
+ external_seek_needed = true;
+
+ // Mark the whole input buffer as used. This way
+ // lzma_stream.total_in will have a better estimate
+ // of the amount of data read. It still won't be perfect
+ // as the value will depend on the input buffer size that
+ // the application uses, but it should be good enough for
+ // those few who want an estimate.
+ *in_pos = in_size;
+ }
+
+ // After seeking (internal or external) the current position
+ // will match the requested target position.
+ coder->file_cur_pos = target_pos;
+
+ return external_seek_needed;
+}
+
+
+/// The caller sets coder->file_target_pos so that it points to the *end*
+/// of the desired file position. This function then determines how far
+/// backwards from that position we can seek. After seeking fill_temp()
+/// can be used to read data into coder->temp. When fill_temp() has finished,
+/// coder->temp[coder->temp_size] will match coder->file_target_pos.
+///
+/// This also validates that coder->target_file_pos is sane in sense that
+/// we aren't trying to seek too far backwards (too close or beyond the
+/// beginning of the file).
+static lzma_ret
+reverse_seek(lzma_file_info_coder *coder,
+ size_t in_start, size_t *in_pos, size_t in_size)
+{
+ // Check that there is enough data before the target position
+ // to contain at least Stream Header and Stream Footer. If there
+ // isn't, the file cannot be valid.
+ if (coder->file_target_pos < 2 * LZMA_STREAM_HEADER_SIZE)
+ return LZMA_DATA_ERROR;
+
+ coder->temp_pos = 0;
+
+ // The Stream Header at the very beginning of the file gets handled
+ // specially in SEQ_MAGIC_BYTES and thus we will never need to seek
+ // there. By not seeking to the first LZMA_STREAM_HEADER_SIZE bytes
+ // we avoid a useless external seek after SEQ_MAGIC_BYTES if the
+ // application uses an extremely small input buffer and the input
+ // file is very small.
+ if (coder->file_target_pos - LZMA_STREAM_HEADER_SIZE
+ < sizeof(coder->temp))
+ coder->temp_size = (size_t)(coder->file_target_pos
+ - LZMA_STREAM_HEADER_SIZE);
+ else
+ coder->temp_size = sizeof(coder->temp);
+
+ // The above if-statements guarantee this. This is important because
+ // the Stream Header/Footer decoders assume that there's at least
+ // LZMA_STREAM_HEADER_SIZE bytes in coder->temp.
+ assert(coder->temp_size >= LZMA_STREAM_HEADER_SIZE);
+
+ if (seek_to_pos(coder, coder->file_target_pos - coder->temp_size,
+ in_start, in_pos, in_size))
+ return LZMA_SEEK_NEEDED;
+
+ return LZMA_OK;
+}
+
+
+/// Gets the number of zero-bytes at the end of the buffer.
+static size_t
+get_padding_size(const uint8_t *buf, size_t buf_size)
+{
+ size_t padding = 0;
+ while (buf_size > 0 && buf[--buf_size] == 0x00)
+ ++padding;
+
+ return padding;
+}
+
+
+/// With the Stream Header at the very beginning of the file, LZMA_FORMAT_ERROR
+/// is used to tell the application that Magic Bytes didn't match. In other
+/// Stream Header/Footer fields (in the middle/end of the file) it could be
+/// a bit confusing to return LZMA_FORMAT_ERROR as we already know that there
+/// is a valid Stream Header at the beginning of the file. For those cases
+/// this function is used to convert LZMA_FORMAT_ERROR to LZMA_DATA_ERROR.
+static lzma_ret
+hide_format_error(lzma_ret ret)
+{
+ if (ret == LZMA_FORMAT_ERROR)
+ ret = LZMA_DATA_ERROR;
+
+ return ret;
+}
+
+
+/// Calls the Index decoder and updates coder->index_remaining.
+/// This is a separate function because the input can be either directly
+/// from the application or from coder->temp.
+static lzma_ret
+decode_index(lzma_file_info_coder *coder, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, bool update_file_cur_pos)
+{
+ const size_t in_start = *in_pos;
+
+ const lzma_ret ret = coder->index_decoder.code(
+ coder->index_decoder.coder,
+ allocator, in, in_pos, in_size,
+ NULL, NULL, 0, LZMA_RUN);
+
+ coder->index_remaining -= *in_pos - in_start;
+
+ if (update_file_cur_pos)
+ coder->file_cur_pos += *in_pos - in_start;
+
+ return ret;
+}
+
+
+static lzma_ret
+file_info_decode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size,
+ uint8_t *restrict out lzma_attribute((__unused__)),
+ size_t *restrict out_pos lzma_attribute((__unused__)),
+ size_t out_size lzma_attribute((__unused__)),
+ lzma_action action lzma_attribute((__unused__)))
+{
+ lzma_file_info_coder *coder = coder_ptr;
+ const size_t in_start = *in_pos;
+
+ // If the caller provides input past the end of the file, trim
+ // the extra bytes from the buffer so that we won't read too far.
+ assert(coder->file_size >= coder->file_cur_pos);
+ if (coder->file_size - coder->file_cur_pos < in_size - in_start)
+ in_size = in_start
+ + (size_t)(coder->file_size - coder->file_cur_pos);
+
+ while (true)
+ switch (coder->sequence) {
+ case SEQ_MAGIC_BYTES:
+ // Decode the Stream Header at the beginning of the file
+ // first to check if the Magic Bytes match. The flags
+ // are stored in coder->first_header_flags so that we
+ // don't need to seek to it again.
+ //
+ // Check that the file is big enough to contain at least
+ // Stream Header.
+ if (coder->file_size < LZMA_STREAM_HEADER_SIZE)
+ return LZMA_FORMAT_ERROR;
+
+ // Read the Stream Header field into coder->temp.
+ if (fill_temp(coder, in, in_pos, in_size))
+ return LZMA_OK;
+
+ // This is the only Stream Header/Footer decoding where we
+ // want to return LZMA_FORMAT_ERROR if the Magic Bytes don't
+ // match. Elsewhere it will be converted to LZMA_DATA_ERROR.
+ return_if_error(lzma_stream_header_decode(
+ &coder->first_header_flags, coder->temp));
+
+ // Now that we know that the Magic Bytes match, check the
+ // file size. It's better to do this here after checking the
+ // Magic Bytes since this way we can give LZMA_FORMAT_ERROR
+ // instead of LZMA_DATA_ERROR when the Magic Bytes don't
+ // match in a file that is too big or isn't a multiple of
+ // four bytes.
+ if (coder->file_size > LZMA_VLI_MAX || (coder->file_size & 3))
+ return LZMA_DATA_ERROR;
+
+ // Start looking for Stream Padding and Stream Footer
+ // at the end of the file.
+ coder->file_target_pos = coder->file_size;
+
+ // Fall through
+
+ case SEQ_PADDING_SEEK:
+ coder->sequence = SEQ_PADDING_DECODE;
+ return_if_error(reverse_seek(
+ coder, in_start, in_pos, in_size));
+
+ // Fall through
+
+ case SEQ_PADDING_DECODE: {
+ // Copy to coder->temp first. This keeps the code simpler if
+ // the application only provides input a few bytes at a time.
+ if (fill_temp(coder, in, in_pos, in_size))
+ return LZMA_OK;
+
+ // Scan the buffer backwards to get the size of the
+ // Stream Padding field (if any).
+ const size_t new_padding = get_padding_size(
+ coder->temp, coder->temp_size);
+ coder->stream_padding += new_padding;
+
+ // Set the target position to the beginning of Stream Padding
+ // that has been observed so far. If all Stream Padding has
+ // been seen, then the target position will be at the end
+ // of the Stream Footer field.
+ coder->file_target_pos -= new_padding;
+
+ if (new_padding == coder->temp_size) {
+ // The whole buffer was padding. Seek backwards in
+ // the file to get more input.
+ coder->sequence = SEQ_PADDING_SEEK;
+ break;
+ }
+
+ // Size of Stream Padding must be a multiple of 4 bytes.
+ if (coder->stream_padding & 3)
+ return LZMA_DATA_ERROR;
+
+ coder->sequence = SEQ_FOOTER;
+
+ // Calculate the amount of non-padding data in coder->temp.
+ coder->temp_size -= new_padding;
+ coder->temp_pos = coder->temp_size;
+
+ // We can avoid an external seek if the whole Stream Footer
+ // is already in coder->temp. In that case SEQ_FOOTER won't
+ // read more input and will find the Stream Footer from
+ // coder->temp[coder->temp_size - LZMA_STREAM_HEADER_SIZE].
+ //
+ // Otherwise we will need to seek. The seeking is done so
+ // that Stream Footer will be at the end of coder->temp.
+ // This way it's likely that we also get a complete Index
+ // field into coder->temp without needing a separate seek
+ // for that (unless the Index field is big).
+ if (coder->temp_size < LZMA_STREAM_HEADER_SIZE)
+ return_if_error(reverse_seek(
+ coder, in_start, in_pos, in_size));
+ }
+
+ // Fall through
+
+ case SEQ_FOOTER:
+ // Copy the Stream Footer field into coder->temp.
+ // If Stream Footer was already available in coder->temp
+ // in SEQ_PADDING_DECODE, then this does nothing.
+ if (fill_temp(coder, in, in_pos, in_size))
+ return LZMA_OK;
+
+ // Make coder->file_target_pos and coder->temp_size point
+ // to the beginning of Stream Footer and thus to the end
+ // of the Index field. coder->temp_pos will be updated
+ // a bit later.
+ coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
+ coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
+
+ // Decode Stream Footer.
+ return_if_error(hide_format_error(lzma_stream_footer_decode(
+ &coder->footer_flags,
+ coder->temp + coder->temp_size)));
+
+ // Check that we won't seek past the beginning of the file.
+ //
+ // LZMA_STREAM_HEADER_SIZE is added because there must be
+ // space for Stream Header too even though we won't seek
+ // there before decoding the Index field.
+ //
+ // There's no risk of integer overflow here because
+ // Backward Size cannot be greater than 2^34.
+ if (coder->file_target_pos < coder->footer_flags.backward_size
+ + LZMA_STREAM_HEADER_SIZE)
+ return LZMA_DATA_ERROR;
+
+ // Set the target position to the beginning of the Index field.
+ coder->file_target_pos -= coder->footer_flags.backward_size;
+ coder->sequence = SEQ_INDEX_INIT;
+
+ // We can avoid an external seek if the whole Index field is
+ // already available in coder->temp.
+ if (coder->temp_size >= coder->footer_flags.backward_size) {
+ // Set coder->temp_pos to point to the beginning
+ // of the Index.
+ coder->temp_pos = coder->temp_size
+ - coder->footer_flags.backward_size;
+ } else {
+ // These are set to zero to indicate that there's no
+ // useful data (Index or anything else) in coder->temp.
+ coder->temp_pos = 0;
+ coder->temp_size = 0;
+
+ // Seek to the beginning of the Index field.
+ if (seek_to_pos(coder, coder->file_target_pos,
+ in_start, in_pos, in_size))
+ return LZMA_SEEK_NEEDED;
+ }
+
+ // Fall through
+
+ case SEQ_INDEX_INIT: {
+ // Calculate the amount of memory already used by the earlier
+ // Indexes so that we know how big memory limit to pass to
+ // the Index decoder.
+ //
+ // NOTE: When there are multiple Streams, the separate
+ // lzma_index structures can use more RAM (as measured by
+ // lzma_index_memused()) than the final combined lzma_index.
+ // Thus memlimit may need to be slightly higher than the final
+ // calculated memory usage will be. This is perhaps a bit
+ // confusing to the application, but I think it shouldn't
+ // cause problems in practice.
+ uint64_t memused = 0;
+ if (coder->combined_index != NULL) {
+ memused = lzma_index_memused(coder->combined_index);
+ assert(memused <= coder->memlimit);
+ if (memused > coder->memlimit) // Extra sanity check
+ return LZMA_PROG_ERROR;
+ }
+
+ // Initialize the Index decoder.
+ return_if_error(lzma_index_decoder_init(
+ &coder->index_decoder, allocator,
+ &coder->this_index,
+ coder->memlimit - memused));
+
+ coder->index_remaining = coder->footer_flags.backward_size;
+ coder->sequence = SEQ_INDEX_DECODE;
+ }
+
+ // Fall through
+
+ case SEQ_INDEX_DECODE: {
+ // Decode (a part of) the Index. If the whole Index is already
+ // in coder->temp, read it from there. Otherwise read from
+ // in[*in_pos] onwards. Note that index_decode() updates
+ // coder->index_remaining and optionally coder->file_cur_pos.
+ lzma_ret ret;
+ if (coder->temp_size != 0) {
+ assert(coder->temp_size - coder->temp_pos
+ == coder->index_remaining);
+ ret = decode_index(coder, allocator, coder->temp,
+ &coder->temp_pos, coder->temp_size,
+ false);
+ } else {
+ // Don't give the decoder more input than the known
+ // remaining size of the Index field.
+ size_t in_stop = in_size;
+ if (in_size - *in_pos > coder->index_remaining)
+ in_stop = *in_pos
+ + (size_t)(coder->index_remaining);
+
+ ret = decode_index(coder, allocator,
+ in, in_pos, in_stop, true);
+ }
+
+ switch (ret) {
+ case LZMA_OK:
+ // If the Index docoder asks for more input when we
+ // have already given it as much input as Backward Size
+ // indicated, the file is invalid.
+ if (coder->index_remaining == 0)
+ return LZMA_DATA_ERROR;
+
+ // We cannot get here if we were reading Index from
+ // coder->temp because when reading from coder->temp
+ // we give the Index decoder exactly
+ // coder->index_remaining bytes of input.
+ assert(coder->temp_size == 0);
+
+ return LZMA_OK;
+
+ case LZMA_STREAM_END:
+ // If the decoding seems to be successful, check also
+ // that the Index decoder consumed as much input as
+ // indicated by the Backward Size field.
+ if (coder->index_remaining != 0)
+ return LZMA_DATA_ERROR;
+
+ break;
+
+ default:
+ return ret;
+ }
+
+ // Calculate how much the Index tells us to seek backwards
+ // (relative to the beginning of the Index): Total size of
+ // all Blocks plus the size of the Stream Header field.
+ // No integer overflow here because lzma_index_total_size()
+ // cannot return a value greater than LZMA_VLI_MAX.
+ const uint64_t seek_amount
+ = lzma_index_total_size(coder->this_index)
+ + LZMA_STREAM_HEADER_SIZE;
+
+ // Check that Index is sane in sense that seek_amount won't
+ // make us seek past the beginning of the file when locating
+ // the Stream Header.
+ //
+ // coder->file_target_pos still points to the beginning of
+ // the Index field.
+ if (coder->file_target_pos < seek_amount)
+ return LZMA_DATA_ERROR;
+
+ // Set the target to the beginning of Stream Header.
+ coder->file_target_pos -= seek_amount;
+
+ if (coder->file_target_pos == 0) {
+ // We would seek to the beginning of the file, but
+ // since we already decoded that Stream Header in
+ // SEQ_MAGIC_BYTES, we can use the cached value from
+ // coder->first_header_flags to avoid the seek.
+ coder->header_flags = coder->first_header_flags;
+ coder->sequence = SEQ_HEADER_COMPARE;
+ break;
+ }
+
+ coder->sequence = SEQ_HEADER_DECODE;
+
+ // Make coder->file_target_pos point to the end of
+ // the Stream Header field.
+ coder->file_target_pos += LZMA_STREAM_HEADER_SIZE;
+
+ // If coder->temp_size is non-zero, it points to the end
+ // of the Index field. Then the beginning of the Index
+ // field is at coder->temp[coder->temp_size
+ // - coder->footer_flags.backward_size].
+ assert(coder->temp_size == 0 || coder->temp_size
+ >= coder->footer_flags.backward_size);
+
+ // If coder->temp contained the whole Index, see if it has
+ // enough data to contain also the Stream Header. If so,
+ // we avoid an external seek.
+ //
+ // NOTE: This can happen only with small .xz files and only
+ // for the non-first Stream as the Stream Flags of the first
+ // Stream are cached and already handled a few lines above.
+ // So this isn't as useful as the other seek-avoidance cases.
+ if (coder->temp_size != 0 && coder->temp_size
+ - coder->footer_flags.backward_size
+ >= seek_amount) {
+ // Make temp_pos and temp_size point to the *end* of
+ // Stream Header so that SEQ_HEADER_DECODE will find
+ // the start of Stream Header from coder->temp[
+ // coder->temp_size - LZMA_STREAM_HEADER_SIZE].
+ coder->temp_pos = coder->temp_size
+ - coder->footer_flags.backward_size
+ - seek_amount
+ + LZMA_STREAM_HEADER_SIZE;
+ coder->temp_size = coder->temp_pos;
+ } else {
+ // Seek so that Stream Header will be at the end of
+ // coder->temp. With typical multi-Stream files we
+ // will usually also get the Stream Footer and Index
+ // of the *previous* Stream in coder->temp and thus
+ // won't need a separate seek for them.
+ return_if_error(reverse_seek(coder,
+ in_start, in_pos, in_size));
+ }
+ }
+
+ // Fall through
+
+ case SEQ_HEADER_DECODE:
+ // Copy the Stream Header field into coder->temp.
+ // If Stream Header was already available in coder->temp
+ // in SEQ_INDEX_DECODE, then this does nothing.
+ if (fill_temp(coder, in, in_pos, in_size))
+ return LZMA_OK;
+
+ // Make all these point to the beginning of Stream Header.
+ coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
+ coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
+ coder->temp_pos = coder->temp_size;
+
+ // Decode the Stream Header.
+ return_if_error(hide_format_error(lzma_stream_header_decode(
+ &coder->header_flags,
+ coder->temp + coder->temp_size)));
+
+ coder->sequence = SEQ_HEADER_COMPARE;
+
+ // Fall through
+
+ case SEQ_HEADER_COMPARE:
+ // Compare Stream Header against Stream Footer. They must
+ // match.
+ return_if_error(lzma_stream_flags_compare(
+ &coder->header_flags, &coder->footer_flags));
+
+ // Store the decoded Stream Flags into the Index. Use the
+ // Footer Flags because it contains Backward Size, although
+ // it shouldn't matter in practice.
+ if (lzma_index_stream_flags(coder->this_index,
+ &coder->footer_flags) != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ // Store also the size of the Stream Padding field. It is
+ // needed to calculate the offsets of the Streams correctly.
+ if (lzma_index_stream_padding(coder->this_index,
+ coder->stream_padding) != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ // Reset it so that it's ready for the next Stream.
+ coder->stream_padding = 0;
+
+ // Append the earlier decoded Indexes after this_index.
+ if (coder->combined_index != NULL)
+ return_if_error(lzma_index_cat(coder->this_index,
+ coder->combined_index, allocator));
+
+ coder->combined_index = coder->this_index;
+ coder->this_index = NULL;
+
+ // If the whole file was decoded, tell the caller that we
+ // are finished.
+ if (coder->file_target_pos == 0) {
+ // The combined index must indicate the same file
+ // size as was told to us at initialization.
+ assert(lzma_index_file_size(coder->combined_index)
+ == coder->file_size);
+
+ // Make the combined index available to
+ // the application.
+ *coder->dest_index = coder->combined_index;
+ coder->combined_index = NULL;
+
+ // Mark the input buffer as used since we may have
+ // done internal seeking and thus don't know how
+ // many input bytes were actually used. This way
+ // lzma_stream.total_in gets a slightly better
+ // estimate of the amount of input used.
+ *in_pos = in_size;
+ return LZMA_STREAM_END;
+ }
+
+ // We didn't hit the beginning of the file yet, so continue
+ // reading backwards in the file. If we have unprocessed
+ // data in coder->temp, use it before requesting more data
+ // from the application.
+ //
+ // coder->file_target_pos, coder->temp_size, and
+ // coder->temp_pos all point to the beginning of Stream Header
+ // and thus the end of the previous Stream in the file.
+ coder->sequence = coder->temp_size > 0
+ ? SEQ_PADDING_DECODE : SEQ_PADDING_SEEK;
+ break;
+
+ default:
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+}
+
+
+static lzma_ret
+file_info_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
+ uint64_t *old_memlimit, uint64_t new_memlimit)
+{
+ lzma_file_info_coder *coder = coder_ptr;
+
+ // The memory usage calculation comes from three things:
+ //
+ // (1) The Indexes that have already been decoded and processed into
+ // coder->combined_index.
+ //
+ // (2) The latest Index in coder->this_index that has been decoded but
+ // not yet put into coder->combined_index.
+ //
+ // (3) The latest Index that we have started decoding but haven't
+ // finished and thus isn't available in coder->this_index yet.
+ // Memory usage and limit information needs to be communicated
+ // from/to coder->index_decoder.
+ //
+ // Care has to be taken to not do both (2) and (3) when calculating
+ // the memory usage.
+ uint64_t combined_index_memusage = 0;
+ uint64_t this_index_memusage = 0;
+
+ // (1) If we have already successfully decoded one or more Indexes,
+ // get their memory usage.
+ if (coder->combined_index != NULL)
+ combined_index_memusage = lzma_index_memused(
+ coder->combined_index);
+
+ // Choose between (2), (3), or neither.
+ if (coder->this_index != NULL) {
+ // (2) The latest Index is available. Use its memory usage.
+ this_index_memusage = lzma_index_memused(coder->this_index);
+
+ } else if (coder->sequence == SEQ_INDEX_DECODE) {
+ // (3) The Index decoder is activate and hasn't yet stored
+ // the new index in coder->this_index. Get the memory usage
+ // information from the Index decoder.
+ //
+ // NOTE: If the Index decoder doesn't yet know how much memory
+ // it will eventually need, it will return a tiny value here.
+ uint64_t dummy;
+ if (coder->index_decoder.memconfig(coder->index_decoder.coder,
+ &this_index_memusage, &dummy, 0)
+ != LZMA_OK) {
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+ }
+
+ // Now we know the total memory usage/requirement. If we had neither
+ // old Indexes nor a new Index, this will be zero which isn't
+ // acceptable as lzma_memusage() has to return non-zero on success
+ // and even with an empty .xz file we will end up with a lzma_index
+ // that takes some memory.
+ *memusage = combined_index_memusage + this_index_memusage;
+ if (*memusage == 0)
+ *memusage = lzma_index_memusage(1, 0);
+
+ *old_memlimit = coder->memlimit;
+
+ // If requested, set a new memory usage limit.
+ if (new_memlimit != 0) {
+ if (new_memlimit < *memusage)
+ return LZMA_MEMLIMIT_ERROR;
+
+ // In the condition (3) we need to tell the Index decoder
+ // its new memory usage limit.
+ if (coder->this_index == NULL
+ && coder->sequence == SEQ_INDEX_DECODE) {
+ const uint64_t idec_new_memlimit = new_memlimit
+ - combined_index_memusage;
+
+ assert(this_index_memusage > 0);
+ assert(idec_new_memlimit > 0);
+
+ uint64_t dummy1;
+ uint64_t dummy2;
+
+ if (coder->index_decoder.memconfig(
+ coder->index_decoder.coder,
+ &dummy1, &dummy2, idec_new_memlimit)
+ != LZMA_OK) {
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+ }
+
+ coder->memlimit = new_memlimit;
+ }
+
+ return LZMA_OK;
+}
+
+
+static void
+file_info_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_file_info_coder *coder = coder_ptr;
+
+ lzma_next_end(&coder->index_decoder, allocator);
+ lzma_index_end(coder->this_index, allocator);
+ lzma_index_end(coder->combined_index, allocator);
+
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+lzma_file_info_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator, uint64_t *seek_pos,
+ lzma_index **dest_index,
+ uint64_t memlimit, uint64_t file_size)
+{
+ lzma_next_coder_init(&lzma_file_info_decoder_init, next, allocator);
+
+ if (dest_index == NULL)
+ return LZMA_PROG_ERROR;
+
+ lzma_file_info_coder *coder = next->coder;
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_file_info_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &file_info_decode;
+ next->end = &file_info_decoder_end;
+ next->memconfig = &file_info_decoder_memconfig;
+
+ coder->index_decoder = LZMA_NEXT_CODER_INIT;
+ coder->this_index = NULL;
+ coder->combined_index = NULL;
+ }
+
+ coder->sequence = SEQ_MAGIC_BYTES;
+ coder->file_cur_pos = 0;
+ coder->file_target_pos = 0;
+ coder->file_size = file_size;
+
+ lzma_index_end(coder->this_index, allocator);
+ coder->this_index = NULL;
+
+ lzma_index_end(coder->combined_index, allocator);
+ coder->combined_index = NULL;
+
+ coder->stream_padding = 0;
+
+ coder->dest_index = dest_index;
+ coder->external_seek_pos = seek_pos;
+
+ // If memlimit is 0, make it 1 to ensure that lzma_memlimit_get()
+ // won't return 0 (which would indicate an error).
+ coder->memlimit = my_max(1, memlimit);
+
+ // Prepare these for reading the first Stream Header into coder->temp.
+ coder->temp_pos = 0;
+ coder->temp_size = LZMA_STREAM_HEADER_SIZE;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_file_info_decoder(lzma_stream *strm, lzma_index **dest_index,
+ uint64_t memlimit, uint64_t file_size)
+{
+ lzma_next_strm_init(lzma_file_info_decoder_init, strm, &strm->seek_pos,
+ dest_index, memlimit, file_size);
+
+ // We allow LZMA_FINISH in addition to LZMA_RUN for convenience.
+ // lzma_code() is able to handle the LZMA_FINISH + LZMA_SEEK_NEEDED
+ // combination in a sane way. Applications still need to be careful
+ // if they use LZMA_FINISH so that they remember to reset it back
+ // to LZMA_RUN after seeking if needed.
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/filter_buffer_decoder.c b/src/liblzma/common/filter_buffer_decoder.c
new file mode 100644
index 0000000..6620986
--- /dev/null
+++ b/src/liblzma/common/filter_buffer_decoder.c
@@ -0,0 +1,88 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file filter_buffer_decoder.c
+/// \brief Single-call raw decoding
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "filter_decoder.h"
+
+
+extern LZMA_API(lzma_ret)
+lzma_raw_buffer_decode(
+ const lzma_filter *filters, const lzma_allocator *allocator,
+ const uint8_t *in, size_t *in_pos, size_t in_size,
+ uint8_t *out, size_t *out_pos, size_t out_size)
+{
+ // Validate what isn't validated later in filter_common.c.
+ if (in == NULL || in_pos == NULL || *in_pos > in_size || out == NULL
+ || out_pos == NULL || *out_pos > out_size)
+ return LZMA_PROG_ERROR;
+
+ // Initialize the decoer.
+ lzma_next_coder next = LZMA_NEXT_CODER_INIT;
+ return_if_error(lzma_raw_decoder_init(&next, allocator, filters));
+
+ // Store the positions so that we can restore them if something
+ // goes wrong.
+ const size_t in_start = *in_pos;
+ const size_t out_start = *out_pos;
+
+ // Do the actual decoding and free decoder's memory.
+ lzma_ret ret = next.code(next.coder, allocator, in, in_pos, in_size,
+ out, out_pos, out_size, LZMA_FINISH);
+
+ if (ret == LZMA_STREAM_END) {
+ ret = LZMA_OK;
+ } else {
+ if (ret == LZMA_OK) {
+ // Either the input was truncated or the
+ // output buffer was too small.
+ assert(*in_pos == in_size || *out_pos == out_size);
+
+ if (*in_pos != in_size) {
+ // Since input wasn't consumed completely,
+ // the output buffer became full and is
+ // too small.
+ ret = LZMA_BUF_ERROR;
+
+ } else if (*out_pos != out_size) {
+ // Since output didn't became full, the input
+ // has to be truncated.
+ ret = LZMA_DATA_ERROR;
+
+ } else {
+ // All the input was consumed and output
+ // buffer is full. Now we don't immediately
+ // know the reason for the error. Try
+ // decoding one more byte. If it succeeds,
+ // then the output buffer was too small. If
+ // we cannot get a new output byte, the input
+ // is truncated.
+ uint8_t tmp[1];
+ size_t tmp_pos = 0;
+ (void)next.code(next.coder, allocator,
+ in, in_pos, in_size,
+ tmp, &tmp_pos, 1, LZMA_FINISH);
+
+ if (tmp_pos == 1)
+ ret = LZMA_BUF_ERROR;
+ else
+ ret = LZMA_DATA_ERROR;
+ }
+ }
+
+ // Restore the positions.
+ *in_pos = in_start;
+ *out_pos = out_start;
+ }
+
+ lzma_next_end(&next, allocator);
+
+ return ret;
+}
diff --git a/src/liblzma/common/filter_buffer_encoder.c b/src/liblzma/common/filter_buffer_encoder.c
new file mode 100644
index 0000000..dda18e3
--- /dev/null
+++ b/src/liblzma/common/filter_buffer_encoder.c
@@ -0,0 +1,55 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file filter_buffer_encoder.c
+/// \brief Single-call raw encoding
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "filter_encoder.h"
+
+
+extern LZMA_API(lzma_ret)
+lzma_raw_buffer_encode(
+ const lzma_filter *filters, const lzma_allocator *allocator,
+ const uint8_t *in, size_t in_size,
+ uint8_t *out, size_t *out_pos, size_t out_size)
+{
+ // Validate what isn't validated later in filter_common.c.
+ if ((in == NULL && in_size != 0) || out == NULL
+ || out_pos == NULL || *out_pos > out_size)
+ return LZMA_PROG_ERROR;
+
+ // Initialize the encoder
+ lzma_next_coder next = LZMA_NEXT_CODER_INIT;
+ return_if_error(lzma_raw_encoder_init(&next, allocator, filters));
+
+ // Store the output position so that we can restore it if
+ // something goes wrong.
+ const size_t out_start = *out_pos;
+
+ // Do the actual encoding and free coder's memory.
+ size_t in_pos = 0;
+ lzma_ret ret = next.code(next.coder, allocator, in, &in_pos, in_size,
+ out, out_pos, out_size, LZMA_FINISH);
+ lzma_next_end(&next, allocator);
+
+ if (ret == LZMA_STREAM_END) {
+ ret = LZMA_OK;
+ } else {
+ if (ret == LZMA_OK) {
+ // Output buffer was too small.
+ assert(*out_pos == out_size);
+ ret = LZMA_BUF_ERROR;
+ }
+
+ // Restore the output position.
+ *out_pos = out_start;
+ }
+
+ return ret;
+}
diff --git a/src/liblzma/common/filter_common.c b/src/liblzma/common/filter_common.c
new file mode 100644
index 0000000..fa0927c
--- /dev/null
+++ b/src/liblzma/common/filter_common.c
@@ -0,0 +1,385 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file filter_common.c
+/// \brief Filter-specific stuff common for both encoder and decoder
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "filter_common.h"
+
+
+static const struct {
+ /// Filter ID
+ lzma_vli id;
+
+ /// Size of the filter-specific options structure
+ size_t options_size;
+
+ /// True if it is OK to use this filter as non-last filter in
+ /// the chain.
+ bool non_last_ok;
+
+ /// True if it is OK to use this filter as the last filter in
+ /// the chain.
+ bool last_ok;
+
+ /// True if the filter may change the size of the data (that is, the
+ /// amount of encoded output can be different than the amount of
+ /// uncompressed input).
+ bool changes_size;
+
+} features[] = {
+#if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1)
+ {
+ .id = LZMA_FILTER_LZMA1,
+ .options_size = sizeof(lzma_options_lzma),
+ .non_last_ok = false,
+ .last_ok = true,
+ .changes_size = true,
+ },
+ {
+ .id = LZMA_FILTER_LZMA1EXT,
+ .options_size = sizeof(lzma_options_lzma),
+ .non_last_ok = false,
+ .last_ok = true,
+ .changes_size = true,
+ },
+#endif
+#if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2)
+ {
+ .id = LZMA_FILTER_LZMA2,
+ .options_size = sizeof(lzma_options_lzma),
+ .non_last_ok = false,
+ .last_ok = true,
+ .changes_size = true,
+ },
+#endif
+#if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86)
+ {
+ .id = LZMA_FILTER_X86,
+ .options_size = sizeof(lzma_options_bcj),
+ .non_last_ok = true,
+ .last_ok = false,
+ .changes_size = false,
+ },
+#endif
+#if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC)
+ {
+ .id = LZMA_FILTER_POWERPC,
+ .options_size = sizeof(lzma_options_bcj),
+ .non_last_ok = true,
+ .last_ok = false,
+ .changes_size = false,
+ },
+#endif
+#if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64)
+ {
+ .id = LZMA_FILTER_IA64,
+ .options_size = sizeof(lzma_options_bcj),
+ .non_last_ok = true,
+ .last_ok = false,
+ .changes_size = false,
+ },
+#endif
+#if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM)
+ {
+ .id = LZMA_FILTER_ARM,
+ .options_size = sizeof(lzma_options_bcj),
+ .non_last_ok = true,
+ .last_ok = false,
+ .changes_size = false,
+ },
+#endif
+#if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB)
+ {
+ .id = LZMA_FILTER_ARMTHUMB,
+ .options_size = sizeof(lzma_options_bcj),
+ .non_last_ok = true,
+ .last_ok = false,
+ .changes_size = false,
+ },
+#endif
+#if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64)
+ {
+ .id = LZMA_FILTER_ARM64,
+ .options_size = sizeof(lzma_options_bcj),
+ .non_last_ok = true,
+ .last_ok = false,
+ .changes_size = false,
+ },
+#endif
+#if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC)
+ {
+ .id = LZMA_FILTER_SPARC,
+ .options_size = sizeof(lzma_options_bcj),
+ .non_last_ok = true,
+ .last_ok = false,
+ .changes_size = false,
+ },
+#endif
+#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
+ {
+ .id = LZMA_FILTER_DELTA,
+ .options_size = sizeof(lzma_options_delta),
+ .non_last_ok = true,
+ .last_ok = false,
+ .changes_size = false,
+ },
+#endif
+ {
+ .id = LZMA_VLI_UNKNOWN
+ }
+};
+
+
+extern LZMA_API(lzma_ret)
+lzma_filters_copy(const lzma_filter *src, lzma_filter *real_dest,
+ const lzma_allocator *allocator)
+{
+ if (src == NULL || real_dest == NULL)
+ return LZMA_PROG_ERROR;
+
+ // Use a temporary destination so that the real destination
+ // will never be modied if an error occurs.
+ lzma_filter dest[LZMA_FILTERS_MAX + 1];
+
+ lzma_ret ret;
+ size_t i;
+ for (i = 0; src[i].id != LZMA_VLI_UNKNOWN; ++i) {
+ // There must be a maximum of four filters plus
+ // the array terminator.
+ if (i == LZMA_FILTERS_MAX) {
+ ret = LZMA_OPTIONS_ERROR;
+ goto error;
+ }
+
+ dest[i].id = src[i].id;
+
+ if (src[i].options == NULL) {
+ dest[i].options = NULL;
+ } else {
+ // See if the filter is supported only when the
+ // options is not NULL. This might be convenient
+ // sometimes if the app is actually copying only
+ // a partial filter chain with a place holder ID.
+ //
+ // When options is not NULL, the Filter ID must be
+ // supported by us, because otherwise we don't know
+ // how big the options are.
+ size_t j;
+ for (j = 0; src[i].id != features[j].id; ++j) {
+ if (features[j].id == LZMA_VLI_UNKNOWN) {
+ ret = LZMA_OPTIONS_ERROR;
+ goto error;
+ }
+ }
+
+ // Allocate and copy the options.
+ dest[i].options = lzma_alloc(features[j].options_size,
+ allocator);
+ if (dest[i].options == NULL) {
+ ret = LZMA_MEM_ERROR;
+ goto error;
+ }
+
+ memcpy(dest[i].options, src[i].options,
+ features[j].options_size);
+ }
+ }
+
+ // Terminate the filter array.
+ assert(i < LZMA_FILTERS_MAX + 1);
+ dest[i].id = LZMA_VLI_UNKNOWN;
+ dest[i].options = NULL;
+
+ // Copy it to the caller-supplied array now that we know that
+ // no errors occurred.
+ memcpy(real_dest, dest, (i + 1) * sizeof(lzma_filter));
+
+ return LZMA_OK;
+
+error:
+ // Free the options which we have already allocated.
+ while (i-- > 0)
+ lzma_free(dest[i].options, allocator);
+
+ return ret;
+}
+
+
+extern LZMA_API(void)
+lzma_filters_free(lzma_filter *filters, const lzma_allocator *allocator)
+{
+ if (filters == NULL)
+ return;
+
+ for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
+ if (i == LZMA_FILTERS_MAX) {
+ // The API says that LZMA_FILTERS_MAX + 1 is the
+ // maximum allowed size including the terminating
+ // element. Thus, we should never get here but in
+ // case there is a bug and we do anyway, don't go
+ // past the (probable) end of the array.
+ assert(0);
+ break;
+ }
+
+ lzma_free(filters[i].options, allocator);
+ filters[i].options = NULL;
+ filters[i].id = LZMA_VLI_UNKNOWN;
+ }
+
+ return;
+}
+
+
+extern lzma_ret
+lzma_validate_chain(const lzma_filter *filters, size_t *count)
+{
+ // There must be at least one filter.
+ if (filters == NULL || filters[0].id == LZMA_VLI_UNKNOWN)
+ return LZMA_PROG_ERROR;
+
+ // Number of non-last filters that may change the size of the data
+ // significantly (that is, more than 1-2 % or so).
+ size_t changes_size_count = 0;
+
+ // True if it is OK to add a new filter after the current filter.
+ bool non_last_ok = true;
+
+ // True if the last filter in the given chain is actually usable as
+ // the last filter. Only filters that support embedding End of Payload
+ // Marker can be used as the last filter in the chain.
+ bool last_ok = false;
+
+ size_t i = 0;
+ do {
+ size_t j;
+ for (j = 0; filters[i].id != features[j].id; ++j)
+ if (features[j].id == LZMA_VLI_UNKNOWN)
+ return LZMA_OPTIONS_ERROR;
+
+ // If the previous filter in the chain cannot be a non-last
+ // filter, the chain is invalid.
+ if (!non_last_ok)
+ return LZMA_OPTIONS_ERROR;
+
+ non_last_ok = features[j].non_last_ok;
+ last_ok = features[j].last_ok;
+ changes_size_count += features[j].changes_size;
+
+ } while (filters[++i].id != LZMA_VLI_UNKNOWN);
+
+ // There must be 1-4 filters. The last filter must be usable as
+ // the last filter in the chain. A maximum of three filters are
+ // allowed to change the size of the data.
+ if (i > LZMA_FILTERS_MAX || !last_ok || changes_size_count > 3)
+ return LZMA_OPTIONS_ERROR;
+
+ *count = i;
+ return LZMA_OK;
+}
+
+
+extern lzma_ret
+lzma_raw_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter *options,
+ lzma_filter_find coder_find, bool is_encoder)
+{
+ // Do some basic validation and get the number of filters.
+ size_t count;
+ return_if_error(lzma_validate_chain(options, &count));
+
+ // Set the filter functions and copy the options pointer.
+ lzma_filter_info filters[LZMA_FILTERS_MAX + 1];
+ if (is_encoder) {
+ for (size_t i = 0; i < count; ++i) {
+ // The order of the filters is reversed in the
+ // encoder. It allows more efficient handling
+ // of the uncompressed data.
+ const size_t j = count - i - 1;
+
+ const lzma_filter_coder *const fc
+ = coder_find(options[i].id);
+ if (fc == NULL || fc->init == NULL)
+ return LZMA_OPTIONS_ERROR;
+
+ filters[j].id = options[i].id;
+ filters[j].init = fc->init;
+ filters[j].options = options[i].options;
+ }
+ } else {
+ for (size_t i = 0; i < count; ++i) {
+ const lzma_filter_coder *const fc
+ = coder_find(options[i].id);
+ if (fc == NULL || fc->init == NULL)
+ return LZMA_OPTIONS_ERROR;
+
+ filters[i].id = options[i].id;
+ filters[i].init = fc->init;
+ filters[i].options = options[i].options;
+ }
+ }
+
+ // Terminate the array.
+ filters[count].id = LZMA_VLI_UNKNOWN;
+ filters[count].init = NULL;
+
+ // Initialize the filters.
+ const lzma_ret ret = lzma_next_filter_init(next, allocator, filters);
+ if (ret != LZMA_OK)
+ lzma_next_end(next, allocator);
+
+ return ret;
+}
+
+
+extern uint64_t
+lzma_raw_coder_memusage(lzma_filter_find coder_find,
+ const lzma_filter *filters)
+{
+ // The chain has to have at least one filter.
+ {
+ size_t tmp;
+ if (lzma_validate_chain(filters, &tmp) != LZMA_OK)
+ return UINT64_MAX;
+ }
+
+ uint64_t total = 0;
+ size_t i = 0;
+
+ do {
+ const lzma_filter_coder *const fc
+ = coder_find(filters[i].id);
+ if (fc == NULL)
+ return UINT64_MAX; // Unsupported Filter ID
+
+ if (fc->memusage == NULL) {
+ // This filter doesn't have a function to calculate
+ // the memory usage and validate the options. Such
+ // filters need only little memory, so we use 1 KiB
+ // as a good estimate. They also accept all possible
+ // options, so there's no need to worry about lack
+ // of validation.
+ total += 1024;
+ } else {
+ // Call the filter-specific memory usage calculation
+ // function.
+ const uint64_t usage
+ = fc->memusage(filters[i].options);
+ if (usage == UINT64_MAX)
+ return UINT64_MAX; // Invalid options
+
+ total += usage;
+ }
+ } while (filters[++i].id != LZMA_VLI_UNKNOWN);
+
+ // Add some fixed amount of extra. It's to compensate memory usage
+ // of Stream, Block etc. coders, malloc() overhead, stack etc.
+ return total + LZMA_MEMUSAGE_BASE;
+}
diff --git a/src/liblzma/common/filter_common.h b/src/liblzma/common/filter_common.h
new file mode 100644
index 0000000..2e47bb6
--- /dev/null
+++ b/src/liblzma/common/filter_common.h
@@ -0,0 +1,51 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file filter_common.h
+/// \brief Filter-specific stuff common for both encoder and decoder
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_FILTER_COMMON_H
+#define LZMA_FILTER_COMMON_H
+
+#include "common.h"
+
+
+/// Both lzma_filter_encoder and lzma_filter_decoder begin with these members.
+typedef struct {
+ /// Filter ID
+ lzma_vli id;
+
+ /// Initializes the filter encoder and calls lzma_next_filter_init()
+ /// for filters + 1.
+ lzma_init_function init;
+
+ /// Calculates memory usage of the encoder. If the options are
+ /// invalid, UINT64_MAX is returned.
+ uint64_t (*memusage)(const void *options);
+
+} lzma_filter_coder;
+
+
+typedef const lzma_filter_coder *(*lzma_filter_find)(lzma_vli id);
+
+
+extern lzma_ret lzma_validate_chain(const lzma_filter *filters, size_t *count);
+
+
+extern lzma_ret lzma_raw_coder_init(
+ lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter *filters,
+ lzma_filter_find coder_find, bool is_encoder);
+
+
+extern uint64_t lzma_raw_coder_memusage(lzma_filter_find coder_find,
+ const lzma_filter *filters);
+
+
+#endif
diff --git a/src/liblzma/common/filter_decoder.c b/src/liblzma/common/filter_decoder.c
new file mode 100644
index 0000000..fa53f5b
--- /dev/null
+++ b/src/liblzma/common/filter_decoder.c
@@ -0,0 +1,198 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file filter_decoder.c
+/// \brief Filter ID mapping to filter-specific functions
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "filter_decoder.h"
+#include "filter_common.h"
+#include "lzma_decoder.h"
+#include "lzma2_decoder.h"
+#include "simple_decoder.h"
+#include "delta_decoder.h"
+
+
+typedef struct {
+ /// Filter ID
+ lzma_vli id;
+
+ /// Initializes the filter encoder and calls lzma_next_filter_init()
+ /// for filters + 1.
+ lzma_init_function init;
+
+ /// Calculates memory usage of the encoder. If the options are
+ /// invalid, UINT64_MAX is returned.
+ uint64_t (*memusage)(const void *options);
+
+ /// Decodes Filter Properties.
+ ///
+ /// \return - LZMA_OK: Properties decoded successfully.
+ /// - LZMA_OPTIONS_ERROR: Unsupported properties
+ /// - LZMA_MEM_ERROR: Memory allocation failed.
+ lzma_ret (*props_decode)(
+ void **options, const lzma_allocator *allocator,
+ const uint8_t *props, size_t props_size);
+
+} lzma_filter_decoder;
+
+
+static const lzma_filter_decoder decoders[] = {
+#ifdef HAVE_DECODER_LZMA1
+ {
+ .id = LZMA_FILTER_LZMA1,
+ .init = &lzma_lzma_decoder_init,
+ .memusage = &lzma_lzma_decoder_memusage,
+ .props_decode = &lzma_lzma_props_decode,
+ },
+ {
+ .id = LZMA_FILTER_LZMA1EXT,
+ .init = &lzma_lzma_decoder_init,
+ .memusage = &lzma_lzma_decoder_memusage,
+ .props_decode = &lzma_lzma_props_decode,
+ },
+#endif
+#ifdef HAVE_DECODER_LZMA2
+ {
+ .id = LZMA_FILTER_LZMA2,
+ .init = &lzma_lzma2_decoder_init,
+ .memusage = &lzma_lzma2_decoder_memusage,
+ .props_decode = &lzma_lzma2_props_decode,
+ },
+#endif
+#ifdef HAVE_DECODER_X86
+ {
+ .id = LZMA_FILTER_X86,
+ .init = &lzma_simple_x86_decoder_init,
+ .memusage = NULL,
+ .props_decode = &lzma_simple_props_decode,
+ },
+#endif
+#ifdef HAVE_DECODER_POWERPC
+ {
+ .id = LZMA_FILTER_POWERPC,
+ .init = &lzma_simple_powerpc_decoder_init,
+ .memusage = NULL,
+ .props_decode = &lzma_simple_props_decode,
+ },
+#endif
+#ifdef HAVE_DECODER_IA64
+ {
+ .id = LZMA_FILTER_IA64,
+ .init = &lzma_simple_ia64_decoder_init,
+ .memusage = NULL,
+ .props_decode = &lzma_simple_props_decode,
+ },
+#endif
+#ifdef HAVE_DECODER_ARM
+ {
+ .id = LZMA_FILTER_ARM,
+ .init = &lzma_simple_arm_decoder_init,
+ .memusage = NULL,
+ .props_decode = &lzma_simple_props_decode,
+ },
+#endif
+#ifdef HAVE_DECODER_ARMTHUMB
+ {
+ .id = LZMA_FILTER_ARMTHUMB,
+ .init = &lzma_simple_armthumb_decoder_init,
+ .memusage = NULL,
+ .props_decode = &lzma_simple_props_decode,
+ },
+#endif
+#ifdef HAVE_DECODER_ARM64
+ {
+ .id = LZMA_FILTER_ARM64,
+ .init = &lzma_simple_arm64_decoder_init,
+ .memusage = NULL,
+ .props_decode = &lzma_simple_props_decode,
+ },
+#endif
+#ifdef HAVE_DECODER_SPARC
+ {
+ .id = LZMA_FILTER_SPARC,
+ .init = &lzma_simple_sparc_decoder_init,
+ .memusage = NULL,
+ .props_decode = &lzma_simple_props_decode,
+ },
+#endif
+#ifdef HAVE_DECODER_DELTA
+ {
+ .id = LZMA_FILTER_DELTA,
+ .init = &lzma_delta_decoder_init,
+ .memusage = &lzma_delta_coder_memusage,
+ .props_decode = &lzma_delta_props_decode,
+ },
+#endif
+};
+
+
+static const lzma_filter_decoder *
+decoder_find(lzma_vli id)
+{
+ for (size_t i = 0; i < ARRAY_SIZE(decoders); ++i)
+ if (decoders[i].id == id)
+ return decoders + i;
+
+ return NULL;
+}
+
+
+extern LZMA_API(lzma_bool)
+lzma_filter_decoder_is_supported(lzma_vli id)
+{
+ return decoder_find(id) != NULL;
+}
+
+
+extern lzma_ret
+lzma_raw_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter *options)
+{
+ return lzma_raw_coder_init(next, allocator,
+ options, (lzma_filter_find)(&decoder_find), false);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_raw_decoder(lzma_stream *strm, const lzma_filter *options)
+{
+ lzma_next_strm_init(lzma_raw_decoder_init, strm, options);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(uint64_t)
+lzma_raw_decoder_memusage(const lzma_filter *filters)
+{
+ return lzma_raw_coder_memusage(
+ (lzma_filter_find)(&decoder_find), filters);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_properties_decode(lzma_filter *filter, const lzma_allocator *allocator,
+ const uint8_t *props, size_t props_size)
+{
+ // Make it always NULL so that the caller can always safely free() it.
+ filter->options = NULL;
+
+ const lzma_filter_decoder *const fd = decoder_find(filter->id);
+ if (fd == NULL)
+ return LZMA_OPTIONS_ERROR;
+
+ if (fd->props_decode == NULL)
+ return props_size == 0 ? LZMA_OK : LZMA_OPTIONS_ERROR;
+
+ return fd->props_decode(
+ &filter->options, allocator, props, props_size);
+}
diff --git a/src/liblzma/common/filter_decoder.h b/src/liblzma/common/filter_decoder.h
new file mode 100644
index 0000000..2dac602
--- /dev/null
+++ b/src/liblzma/common/filter_decoder.h
@@ -0,0 +1,23 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file filter_decoder.h
+/// \brief Filter ID mapping to filter-specific functions
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_FILTER_DECODER_H
+#define LZMA_FILTER_DECODER_H
+
+#include "common.h"
+
+
+extern lzma_ret lzma_raw_decoder_init(
+ lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter *options);
+
+#endif
diff --git a/src/liblzma/common/filter_encoder.c b/src/liblzma/common/filter_encoder.c
new file mode 100644
index 0000000..46fe8af
--- /dev/null
+++ b/src/liblzma/common/filter_encoder.c
@@ -0,0 +1,308 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file filter_decoder.c
+/// \brief Filter ID mapping to filter-specific functions
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "filter_encoder.h"
+#include "filter_common.h"
+#include "lzma_encoder.h"
+#include "lzma2_encoder.h"
+#include "simple_encoder.h"
+#include "delta_encoder.h"
+
+
+typedef struct {
+ /// Filter ID
+ lzma_vli id;
+
+ /// Initializes the filter encoder and calls lzma_next_filter_init()
+ /// for filters + 1.
+ lzma_init_function init;
+
+ /// Calculates memory usage of the encoder. If the options are
+ /// invalid, UINT64_MAX is returned.
+ uint64_t (*memusage)(const void *options);
+
+ /// Calculates the recommended Uncompressed Size for .xz Blocks to
+ /// which the input data can be split to make multithreaded
+ /// encoding possible. If this is NULL, it is assumed that
+ /// the encoder is fast enough with single thread.
+ uint64_t (*block_size)(const void *options);
+
+ /// Tells the size of the Filter Properties field. If options are
+ /// invalid, LZMA_OPTIONS_ERROR is returned and size is set to
+ /// UINT32_MAX.
+ lzma_ret (*props_size_get)(uint32_t *size, const void *options);
+
+ /// Some filters will always have the same size Filter Properties
+ /// field. If props_size_get is NULL, this value is used.
+ uint32_t props_size_fixed;
+
+ /// Encodes Filter Properties.
+ ///
+ /// \return - LZMA_OK: Properties encoded successfully.
+ /// - LZMA_OPTIONS_ERROR: Unsupported options
+ /// - LZMA_PROG_ERROR: Invalid options or not enough
+ /// output space
+ lzma_ret (*props_encode)(const void *options, uint8_t *out);
+
+} lzma_filter_encoder;
+
+
+static const lzma_filter_encoder encoders[] = {
+#ifdef HAVE_ENCODER_LZMA1
+ {
+ .id = LZMA_FILTER_LZMA1,
+ .init = &lzma_lzma_encoder_init,
+ .memusage = &lzma_lzma_encoder_memusage,
+ .block_size = NULL, // Not needed for LZMA1
+ .props_size_get = NULL,
+ .props_size_fixed = 5,
+ .props_encode = &lzma_lzma_props_encode,
+ },
+ {
+ .id = LZMA_FILTER_LZMA1EXT,
+ .init = &lzma_lzma_encoder_init,
+ .memusage = &lzma_lzma_encoder_memusage,
+ .block_size = NULL, // Not needed for LZMA1
+ .props_size_get = NULL,
+ .props_size_fixed = 5,
+ .props_encode = &lzma_lzma_props_encode,
+ },
+#endif
+#ifdef HAVE_ENCODER_LZMA2
+ {
+ .id = LZMA_FILTER_LZMA2,
+ .init = &lzma_lzma2_encoder_init,
+ .memusage = &lzma_lzma2_encoder_memusage,
+ .block_size = &lzma_lzma2_block_size,
+ .props_size_get = NULL,
+ .props_size_fixed = 1,
+ .props_encode = &lzma_lzma2_props_encode,
+ },
+#endif
+#ifdef HAVE_ENCODER_X86
+ {
+ .id = LZMA_FILTER_X86,
+ .init = &lzma_simple_x86_encoder_init,
+ .memusage = NULL,
+ .block_size = NULL,
+ .props_size_get = &lzma_simple_props_size,
+ .props_encode = &lzma_simple_props_encode,
+ },
+#endif
+#ifdef HAVE_ENCODER_POWERPC
+ {
+ .id = LZMA_FILTER_POWERPC,
+ .init = &lzma_simple_powerpc_encoder_init,
+ .memusage = NULL,
+ .block_size = NULL,
+ .props_size_get = &lzma_simple_props_size,
+ .props_encode = &lzma_simple_props_encode,
+ },
+#endif
+#ifdef HAVE_ENCODER_IA64
+ {
+ .id = LZMA_FILTER_IA64,
+ .init = &lzma_simple_ia64_encoder_init,
+ .memusage = NULL,
+ .block_size = NULL,
+ .props_size_get = &lzma_simple_props_size,
+ .props_encode = &lzma_simple_props_encode,
+ },
+#endif
+#ifdef HAVE_ENCODER_ARM
+ {
+ .id = LZMA_FILTER_ARM,
+ .init = &lzma_simple_arm_encoder_init,
+ .memusage = NULL,
+ .block_size = NULL,
+ .props_size_get = &lzma_simple_props_size,
+ .props_encode = &lzma_simple_props_encode,
+ },
+#endif
+#ifdef HAVE_ENCODER_ARMTHUMB
+ {
+ .id = LZMA_FILTER_ARMTHUMB,
+ .init = &lzma_simple_armthumb_encoder_init,
+ .memusage = NULL,
+ .block_size = NULL,
+ .props_size_get = &lzma_simple_props_size,
+ .props_encode = &lzma_simple_props_encode,
+ },
+#endif
+#ifdef HAVE_ENCODER_ARM64
+ {
+ .id = LZMA_FILTER_ARM64,
+ .init = &lzma_simple_arm64_encoder_init,
+ .memusage = NULL,
+ .block_size = NULL,
+ .props_size_get = &lzma_simple_props_size,
+ .props_encode = &lzma_simple_props_encode,
+ },
+#endif
+#ifdef HAVE_ENCODER_SPARC
+ {
+ .id = LZMA_FILTER_SPARC,
+ .init = &lzma_simple_sparc_encoder_init,
+ .memusage = NULL,
+ .block_size = NULL,
+ .props_size_get = &lzma_simple_props_size,
+ .props_encode = &lzma_simple_props_encode,
+ },
+#endif
+#ifdef HAVE_ENCODER_DELTA
+ {
+ .id = LZMA_FILTER_DELTA,
+ .init = &lzma_delta_encoder_init,
+ .memusage = &lzma_delta_coder_memusage,
+ .block_size = NULL,
+ .props_size_get = NULL,
+ .props_size_fixed = 1,
+ .props_encode = &lzma_delta_props_encode,
+ },
+#endif
+};
+
+
+static const lzma_filter_encoder *
+encoder_find(lzma_vli id)
+{
+ for (size_t i = 0; i < ARRAY_SIZE(encoders); ++i)
+ if (encoders[i].id == id)
+ return encoders + i;
+
+ return NULL;
+}
+
+
+extern LZMA_API(lzma_bool)
+lzma_filter_encoder_is_supported(lzma_vli id)
+{
+ return encoder_find(id) != NULL;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_filters_update(lzma_stream *strm, const lzma_filter *filters)
+{
+ if (strm->internal->next.update == NULL)
+ return LZMA_PROG_ERROR;
+
+ // Validate the filter chain.
+ if (lzma_raw_encoder_memusage(filters) == UINT64_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ // The actual filter chain in the encoder is reversed. Some things
+ // still want the normal order chain, so we provide both.
+ size_t count = 1;
+ while (filters[count].id != LZMA_VLI_UNKNOWN)
+ ++count;
+
+ lzma_filter reversed_filters[LZMA_FILTERS_MAX + 1];
+ for (size_t i = 0; i < count; ++i)
+ reversed_filters[count - i - 1] = filters[i];
+
+ reversed_filters[count].id = LZMA_VLI_UNKNOWN;
+
+ return strm->internal->next.update(strm->internal->next.coder,
+ strm->allocator, filters, reversed_filters);
+}
+
+
+extern lzma_ret
+lzma_raw_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter *options)
+{
+ return lzma_raw_coder_init(next, allocator,
+ options, (lzma_filter_find)(&encoder_find), true);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_raw_encoder(lzma_stream *strm, const lzma_filter *options)
+{
+ lzma_next_strm_init(lzma_raw_coder_init, strm, options,
+ (lzma_filter_find)(&encoder_find), true);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(uint64_t)
+lzma_raw_encoder_memusage(const lzma_filter *filters)
+{
+ return lzma_raw_coder_memusage(
+ (lzma_filter_find)(&encoder_find), filters);
+}
+
+
+extern uint64_t
+lzma_mt_block_size(const lzma_filter *filters)
+{
+ uint64_t max = 0;
+
+ for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
+ const lzma_filter_encoder *const fe
+ = encoder_find(filters[i].id);
+ if (fe->block_size != NULL) {
+ const uint64_t size
+ = fe->block_size(filters[i].options);
+ if (size == 0)
+ return 0;
+
+ if (size > max)
+ max = size;
+ }
+ }
+
+ return max;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_properties_size(uint32_t *size, const lzma_filter *filter)
+{
+ const lzma_filter_encoder *const fe = encoder_find(filter->id);
+ if (fe == NULL) {
+ // Unknown filter - if the Filter ID is a proper VLI,
+ // return LZMA_OPTIONS_ERROR instead of LZMA_PROG_ERROR,
+ // because it's possible that we just don't have support
+ // compiled in for the requested filter.
+ return filter->id <= LZMA_VLI_MAX
+ ? LZMA_OPTIONS_ERROR : LZMA_PROG_ERROR;
+ }
+
+ if (fe->props_size_get == NULL) {
+ // No props_size_get() function, use props_size_fixed.
+ *size = fe->props_size_fixed;
+ return LZMA_OK;
+ }
+
+ return fe->props_size_get(size, filter->options);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_properties_encode(const lzma_filter *filter, uint8_t *props)
+{
+ const lzma_filter_encoder *const fe = encoder_find(filter->id);
+ if (fe == NULL)
+ return LZMA_PROG_ERROR;
+
+ if (fe->props_encode == NULL)
+ return LZMA_OK;
+
+ return fe->props_encode(filter->options, props);
+}
diff --git a/src/liblzma/common/filter_encoder.h b/src/liblzma/common/filter_encoder.h
new file mode 100644
index 0000000..f1d5683
--- /dev/null
+++ b/src/liblzma/common/filter_encoder.h
@@ -0,0 +1,27 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file filter_encoder.c
+/// \brief Filter ID mapping to filter-specific functions
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_FILTER_ENCODER_H
+#define LZMA_FILTER_ENCODER_H
+
+#include "common.h"
+
+
+// FIXME: Might become a part of the public API.
+extern uint64_t lzma_mt_block_size(const lzma_filter *filters);
+
+
+extern lzma_ret lzma_raw_encoder_init(
+ lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter *filters);
+
+#endif
diff --git a/src/liblzma/common/filter_flags_decoder.c b/src/liblzma/common/filter_flags_decoder.c
new file mode 100644
index 0000000..ddfb085
--- /dev/null
+++ b/src/liblzma/common/filter_flags_decoder.c
@@ -0,0 +1,46 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file filter_flags_decoder.c
+/// \brief Decodes a Filter Flags field
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "filter_decoder.h"
+
+
+extern LZMA_API(lzma_ret)
+lzma_filter_flags_decode(
+ lzma_filter *filter, const lzma_allocator *allocator,
+ const uint8_t *in, size_t *in_pos, size_t in_size)
+{
+ // Set the pointer to NULL so the caller can always safely free it.
+ filter->options = NULL;
+
+ // Filter ID
+ return_if_error(lzma_vli_decode(&filter->id, NULL,
+ in, in_pos, in_size));
+
+ if (filter->id >= LZMA_FILTER_RESERVED_START)
+ return LZMA_DATA_ERROR;
+
+ // Size of Properties
+ lzma_vli props_size;
+ return_if_error(lzma_vli_decode(&props_size, NULL,
+ in, in_pos, in_size));
+
+ // Filter Properties
+ if (in_size - *in_pos < props_size)
+ return LZMA_DATA_ERROR;
+
+ const lzma_ret ret = lzma_properties_decode(
+ filter, allocator, in + *in_pos, props_size);
+
+ *in_pos += props_size;
+
+ return ret;
+}
diff --git a/src/liblzma/common/filter_flags_encoder.c b/src/liblzma/common/filter_flags_encoder.c
new file mode 100644
index 0000000..b57b9fd
--- /dev/null
+++ b/src/liblzma/common/filter_flags_encoder.c
@@ -0,0 +1,56 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file filter_flags_encoder.c
+/// \brief Encodes a Filter Flags field
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "filter_encoder.h"
+
+
+extern LZMA_API(lzma_ret)
+lzma_filter_flags_size(uint32_t *size, const lzma_filter *filter)
+{
+ if (filter->id >= LZMA_FILTER_RESERVED_START)
+ return LZMA_PROG_ERROR;
+
+ return_if_error(lzma_properties_size(size, filter));
+
+ *size += lzma_vli_size(filter->id) + lzma_vli_size(*size);
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_filter_flags_encode(const lzma_filter *filter,
+ uint8_t *out, size_t *out_pos, size_t out_size)
+{
+ // Filter ID
+ if (filter->id >= LZMA_FILTER_RESERVED_START)
+ return LZMA_PROG_ERROR;
+
+ return_if_error(lzma_vli_encode(filter->id, NULL,
+ out, out_pos, out_size));
+
+ // Size of Properties
+ uint32_t props_size;
+ return_if_error(lzma_properties_size(&props_size, filter));
+ return_if_error(lzma_vli_encode(props_size, NULL,
+ out, out_pos, out_size));
+
+ // Filter Properties
+ if (out_size - *out_pos < props_size)
+ return LZMA_PROG_ERROR;
+
+ return_if_error(lzma_properties_encode(filter, out + *out_pos));
+
+ *out_pos += props_size;
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/hardware_cputhreads.c b/src/liblzma/common/hardware_cputhreads.c
new file mode 100644
index 0000000..5d246d2
--- /dev/null
+++ b/src/liblzma/common/hardware_cputhreads.c
@@ -0,0 +1,34 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file hardware_cputhreads.c
+/// \brief Get the number of CPU threads or cores
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+
+#include "tuklib_cpucores.h"
+
+
+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+// This is for compatibility with binaries linked against liblzma that
+// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7.
+LZMA_SYMVER_API("lzma_cputhreads@XZ_5.2.2",
+ uint32_t, lzma_cputhreads_522)(void) lzma_nothrow
+ __attribute__((__alias__("lzma_cputhreads_52")));
+
+LZMA_SYMVER_API("lzma_cputhreads@@XZ_5.2",
+ uint32_t, lzma_cputhreads_52)(void) lzma_nothrow;
+
+#define lzma_cputhreads lzma_cputhreads_52
+#endif
+extern LZMA_API(uint32_t)
+lzma_cputhreads(void)
+{
+ return tuklib_cpucores();
+}
diff --git a/src/liblzma/common/hardware_physmem.c b/src/liblzma/common/hardware_physmem.c
new file mode 100644
index 0000000..a2bbbe2
--- /dev/null
+++ b/src/liblzma/common/hardware_physmem.c
@@ -0,0 +1,25 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file hardware_physmem.c
+/// \brief Get the total amount of physical memory (RAM)
+//
+// Author: Jonathan Nieder
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+
+#include "tuklib_physmem.h"
+
+
+extern LZMA_API(uint64_t)
+lzma_physmem(void)
+{
+ // It is simpler to make lzma_physmem() a wrapper for
+ // tuklib_physmem() than to hack appropriate symbol visibility
+ // support for the tuklib modules.
+ return tuklib_physmem();
+}
diff --git a/src/liblzma/common/index.c b/src/liblzma/common/index.c
new file mode 100644
index 0000000..8a35f43
--- /dev/null
+++ b/src/liblzma/common/index.c
@@ -0,0 +1,1269 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file index.c
+/// \brief Handling of .xz Indexes and some other Stream information
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+#include "index.h"
+#include "stream_flags_common.h"
+
+
+/// \brief How many Records to allocate at once
+///
+/// This should be big enough to avoid making lots of tiny allocations
+/// but small enough to avoid too much unused memory at once.
+#define INDEX_GROUP_SIZE 512
+
+
+/// \brief How many Records can be allocated at once at maximum
+#define PREALLOC_MAX ((SIZE_MAX - sizeof(index_group)) / sizeof(index_record))
+
+
+/// \brief Base structure for index_stream and index_group structures
+typedef struct index_tree_node_s index_tree_node;
+struct index_tree_node_s {
+ /// Uncompressed start offset of this Stream (relative to the
+ /// beginning of the file) or Block (relative to the beginning
+ /// of the Stream)
+ lzma_vli uncompressed_base;
+
+ /// Compressed start offset of this Stream or Block
+ lzma_vli compressed_base;
+
+ index_tree_node *parent;
+ index_tree_node *left;
+ index_tree_node *right;
+};
+
+
+/// \brief AVL tree to hold index_stream or index_group structures
+typedef struct {
+ /// Root node
+ index_tree_node *root;
+
+ /// Leftmost node. Since the tree will be filled sequentially,
+ /// this won't change after the first node has been added to
+ /// the tree.
+ index_tree_node *leftmost;
+
+ /// The rightmost node in the tree. Since the tree is filled
+ /// sequentially, this is always the node where to add the new data.
+ index_tree_node *rightmost;
+
+ /// Number of nodes in the tree
+ uint32_t count;
+
+} index_tree;
+
+
+typedef struct {
+ lzma_vli uncompressed_sum;
+ lzma_vli unpadded_sum;
+} index_record;
+
+
+typedef struct {
+ /// Every Record group is part of index_stream.groups tree.
+ index_tree_node node;
+
+ /// Number of Blocks in this Stream before this group.
+ lzma_vli number_base;
+
+ /// Number of Records that can be put in records[].
+ size_t allocated;
+
+ /// Index of the last Record in use.
+ size_t last;
+
+ /// The sizes in this array are stored as cumulative sums relative
+ /// to the beginning of the Stream. This makes it possible to
+ /// use binary search in lzma_index_locate().
+ ///
+ /// Note that the cumulative summing is done specially for
+ /// unpadded_sum: The previous value is rounded up to the next
+ /// multiple of four before adding the Unpadded Size of the new
+ /// Block. The total encoded size of the Blocks in the Stream
+ /// is records[last].unpadded_sum in the last Record group of
+ /// the Stream.
+ ///
+ /// For example, if the Unpadded Sizes are 39, 57, and 81, the
+ /// stored values are 39, 97 (40 + 57), and 181 (100 + 181).
+ /// The total encoded size of these Blocks is 184.
+ ///
+ /// This is a flexible array, because it makes easy to optimize
+ /// memory usage in case someone concatenates many Streams that
+ /// have only one or few Blocks.
+ index_record records[];
+
+} index_group;
+
+
+typedef struct {
+ /// Every index_stream is a node in the tree of Streams.
+ index_tree_node node;
+
+ /// Number of this Stream (first one is 1)
+ uint32_t number;
+
+ /// Total number of Blocks before this Stream
+ lzma_vli block_number_base;
+
+ /// Record groups of this Stream are stored in a tree.
+ /// It's a T-tree with AVL-tree balancing. There are
+ /// INDEX_GROUP_SIZE Records per node by default.
+ /// This keeps the number of memory allocations reasonable
+ /// and finding a Record is fast.
+ index_tree groups;
+
+ /// Number of Records in this Stream
+ lzma_vli record_count;
+
+ /// Size of the List of Records field in this Stream. This is used
+ /// together with record_count to calculate the size of the Index
+ /// field and thus the total size of the Stream.
+ lzma_vli index_list_size;
+
+ /// Stream Flags of this Stream. This is meaningful only if
+ /// the Stream Flags have been told us with lzma_index_stream_flags().
+ /// Initially stream_flags.version is set to UINT32_MAX to indicate
+ /// that the Stream Flags are unknown.
+ lzma_stream_flags stream_flags;
+
+ /// Amount of Stream Padding after this Stream. This defaults to
+ /// zero and can be set with lzma_index_stream_padding().
+ lzma_vli stream_padding;
+
+} index_stream;
+
+
+struct lzma_index_s {
+ /// AVL-tree containing the Stream(s). Often there is just one
+ /// Stream, but using a tree keeps lookups fast even when there
+ /// are many concatenated Streams.
+ index_tree streams;
+
+ /// Uncompressed size of all the Blocks in the Stream(s)
+ lzma_vli uncompressed_size;
+
+ /// Total size of all the Blocks in the Stream(s)
+ lzma_vli total_size;
+
+ /// Total number of Records in all Streams in this lzma_index
+ lzma_vli record_count;
+
+ /// Size of the List of Records field if all the Streams in this
+ /// lzma_index were packed into a single Stream (makes it simpler to
+ /// take many .xz files and combine them into a single Stream).
+ ///
+ /// This value together with record_count is needed to calculate
+ /// Backward Size that is stored into Stream Footer.
+ lzma_vli index_list_size;
+
+ /// How many Records to allocate at once in lzma_index_append().
+ /// This defaults to INDEX_GROUP_SIZE but can be overridden with
+ /// lzma_index_prealloc().
+ size_t prealloc;
+
+ /// Bitmask indicating what integrity check types have been used
+ /// as set by lzma_index_stream_flags(). The bit of the last Stream
+ /// is not included here, since it is possible to change it by
+ /// calling lzma_index_stream_flags() again.
+ uint32_t checks;
+};
+
+
+static void
+index_tree_init(index_tree *tree)
+{
+ tree->root = NULL;
+ tree->leftmost = NULL;
+ tree->rightmost = NULL;
+ tree->count = 0;
+ return;
+}
+
+
+/// Helper for index_tree_end()
+static void
+index_tree_node_end(index_tree_node *node, const lzma_allocator *allocator,
+ void (*free_func)(void *node, const lzma_allocator *allocator))
+{
+ // The tree won't ever be very huge, so recursion should be fine.
+ // 20 levels in the tree is likely quite a lot already in practice.
+ if (node->left != NULL)
+ index_tree_node_end(node->left, allocator, free_func);
+
+ if (node->right != NULL)
+ index_tree_node_end(node->right, allocator, free_func);
+
+ free_func(node, allocator);
+ return;
+}
+
+
+/// Free the memory allocated for a tree. Each node is freed using the
+/// given free_func which is either &lzma_free or &index_stream_end.
+/// The latter is used to free the Record groups from each index_stream
+/// before freeing the index_stream itself.
+static void
+index_tree_end(index_tree *tree, const lzma_allocator *allocator,
+ void (*free_func)(void *node, const lzma_allocator *allocator))
+{
+ assert(free_func != NULL);
+
+ if (tree->root != NULL)
+ index_tree_node_end(tree->root, allocator, free_func);
+
+ return;
+}
+
+
+/// Add a new node to the tree. node->uncompressed_base and
+/// node->compressed_base must have been set by the caller already.
+static void
+index_tree_append(index_tree *tree, index_tree_node *node)
+{
+ node->parent = tree->rightmost;
+ node->left = NULL;
+ node->right = NULL;
+
+ ++tree->count;
+
+ // Handle the special case of adding the first node.
+ if (tree->root == NULL) {
+ tree->root = node;
+ tree->leftmost = node;
+ tree->rightmost = node;
+ return;
+ }
+
+ // The tree is always filled sequentially.
+ assert(tree->rightmost->uncompressed_base <= node->uncompressed_base);
+ assert(tree->rightmost->compressed_base < node->compressed_base);
+
+ // Add the new node after the rightmost node. It's the correct
+ // place due to the reason above.
+ tree->rightmost->right = node;
+ tree->rightmost = node;
+
+ // Balance the AVL-tree if needed. We don't need to keep the balance
+ // factors in nodes, because we always fill the tree sequentially,
+ // and thus know the state of the tree just by looking at the node
+ // count. From the node count we can calculate how many steps to go
+ // up in the tree to find the rotation root.
+ uint32_t up = tree->count ^ (UINT32_C(1) << bsr32(tree->count));
+ if (up != 0) {
+ // Locate the root node for the rotation.
+ up = ctz32(tree->count) + 2;
+ do {
+ node = node->parent;
+ } while (--up > 0);
+
+ // Rotate left using node as the rotation root.
+ index_tree_node *pivot = node->right;
+
+ if (node->parent == NULL) {
+ tree->root = pivot;
+ } else {
+ assert(node->parent->right == node);
+ node->parent->right = pivot;
+ }
+
+ pivot->parent = node->parent;
+
+ node->right = pivot->left;
+ if (node->right != NULL)
+ node->right->parent = node;
+
+ pivot->left = node;
+ node->parent = pivot;
+ }
+
+ return;
+}
+
+
+/// Get the next node in the tree. Return NULL if there are no more nodes.
+static void *
+index_tree_next(const index_tree_node *node)
+{
+ if (node->right != NULL) {
+ node = node->right;
+ while (node->left != NULL)
+ node = node->left;
+
+ return (void *)(node);
+ }
+
+ while (node->parent != NULL && node->parent->right == node)
+ node = node->parent;
+
+ return (void *)(node->parent);
+}
+
+
+/// Locate a node that contains the given uncompressed offset. It is
+/// caller's job to check that target is not bigger than the uncompressed
+/// size of the tree (the last node would be returned in that case still).
+static void *
+index_tree_locate(const index_tree *tree, lzma_vli target)
+{
+ const index_tree_node *result = NULL;
+ const index_tree_node *node = tree->root;
+
+ assert(tree->leftmost == NULL
+ || tree->leftmost->uncompressed_base == 0);
+
+ // Consecutive nodes may have the same uncompressed_base.
+ // We must pick the rightmost one.
+ while (node != NULL) {
+ if (node->uncompressed_base > target) {
+ node = node->left;
+ } else {
+ result = node;
+ node = node->right;
+ }
+ }
+
+ return (void *)(result);
+}
+
+
+/// Allocate and initialize a new Stream using the given base offsets.
+static index_stream *
+index_stream_init(lzma_vli compressed_base, lzma_vli uncompressed_base,
+ uint32_t stream_number, lzma_vli block_number_base,
+ const lzma_allocator *allocator)
+{
+ index_stream *s = lzma_alloc(sizeof(index_stream), allocator);
+ if (s == NULL)
+ return NULL;
+
+ s->node.uncompressed_base = uncompressed_base;
+ s->node.compressed_base = compressed_base;
+ s->node.parent = NULL;
+ s->node.left = NULL;
+ s->node.right = NULL;
+
+ s->number = stream_number;
+ s->block_number_base = block_number_base;
+
+ index_tree_init(&s->groups);
+
+ s->record_count = 0;
+ s->index_list_size = 0;
+ s->stream_flags.version = UINT32_MAX;
+ s->stream_padding = 0;
+
+ return s;
+}
+
+
+/// Free the memory allocated for a Stream and its Record groups.
+static void
+index_stream_end(void *node, const lzma_allocator *allocator)
+{
+ index_stream *s = node;
+ index_tree_end(&s->groups, allocator, &lzma_free);
+ lzma_free(s, allocator);
+ return;
+}
+
+
+static lzma_index *
+index_init_plain(const lzma_allocator *allocator)
+{
+ lzma_index *i = lzma_alloc(sizeof(lzma_index), allocator);
+ if (i != NULL) {
+ index_tree_init(&i->streams);
+ i->uncompressed_size = 0;
+ i->total_size = 0;
+ i->record_count = 0;
+ i->index_list_size = 0;
+ i->prealloc = INDEX_GROUP_SIZE;
+ i->checks = 0;
+ }
+
+ return i;
+}
+
+
+extern LZMA_API(lzma_index *)
+lzma_index_init(const lzma_allocator *allocator)
+{
+ lzma_index *i = index_init_plain(allocator);
+ if (i == NULL)
+ return NULL;
+
+ index_stream *s = index_stream_init(0, 0, 1, 0, allocator);
+ if (s == NULL) {
+ lzma_free(i, allocator);
+ return NULL;
+ }
+
+ index_tree_append(&i->streams, &s->node);
+
+ return i;
+}
+
+
+extern LZMA_API(void)
+lzma_index_end(lzma_index *i, const lzma_allocator *allocator)
+{
+ // NOTE: If you modify this function, check also the bottom
+ // of lzma_index_cat().
+ if (i != NULL) {
+ index_tree_end(&i->streams, allocator, &index_stream_end);
+ lzma_free(i, allocator);
+ }
+
+ return;
+}
+
+
+extern void
+lzma_index_prealloc(lzma_index *i, lzma_vli records)
+{
+ if (records > PREALLOC_MAX)
+ records = PREALLOC_MAX;
+
+ i->prealloc = (size_t)(records);
+ return;
+}
+
+
+extern LZMA_API(uint64_t)
+lzma_index_memusage(lzma_vli streams, lzma_vli blocks)
+{
+ // This calculates an upper bound that is only a little bit
+ // bigger than the exact maximum memory usage with the given
+ // parameters.
+
+ // Typical malloc() overhead is 2 * sizeof(void *) but we take
+ // a little bit extra just in case. Using LZMA_MEMUSAGE_BASE
+ // instead would give too inaccurate estimate.
+ const size_t alloc_overhead = 4 * sizeof(void *);
+
+ // Amount of memory needed for each Stream base structures.
+ // We assume that every Stream has at least one Block and
+ // thus at least one group.
+ const size_t stream_base = sizeof(index_stream)
+ + sizeof(index_group) + 2 * alloc_overhead;
+
+ // Amount of memory needed per group.
+ const size_t group_base = sizeof(index_group)
+ + INDEX_GROUP_SIZE * sizeof(index_record)
+ + alloc_overhead;
+
+ // Number of groups. There may actually be more, but that overhead
+ // has been taken into account in stream_base already.
+ const lzma_vli groups
+ = (blocks + INDEX_GROUP_SIZE - 1) / INDEX_GROUP_SIZE;
+
+ // Memory used by index_stream and index_group structures.
+ const uint64_t streams_mem = streams * stream_base;
+ const uint64_t groups_mem = groups * group_base;
+
+ // Memory used by the base structure.
+ const uint64_t index_base = sizeof(lzma_index) + alloc_overhead;
+
+ // Validate the arguments and catch integer overflows.
+ // Maximum number of Streams is "only" UINT32_MAX, because
+ // that limit is used by the tree containing the Streams.
+ const uint64_t limit = UINT64_MAX - index_base;
+ if (streams == 0 || streams > UINT32_MAX || blocks > LZMA_VLI_MAX
+ || streams > limit / stream_base
+ || groups > limit / group_base
+ || limit - streams_mem < groups_mem)
+ return UINT64_MAX;
+
+ return index_base + streams_mem + groups_mem;
+}
+
+
+extern LZMA_API(uint64_t)
+lzma_index_memused(const lzma_index *i)
+{
+ return lzma_index_memusage(i->streams.count, i->record_count);
+}
+
+
+extern LZMA_API(lzma_vli)
+lzma_index_block_count(const lzma_index *i)
+{
+ return i->record_count;
+}
+
+
+extern LZMA_API(lzma_vli)
+lzma_index_stream_count(const lzma_index *i)
+{
+ return i->streams.count;
+}
+
+
+extern LZMA_API(lzma_vli)
+lzma_index_size(const lzma_index *i)
+{
+ return index_size(i->record_count, i->index_list_size);
+}
+
+
+extern LZMA_API(lzma_vli)
+lzma_index_total_size(const lzma_index *i)
+{
+ return i->total_size;
+}
+
+
+extern LZMA_API(lzma_vli)
+lzma_index_stream_size(const lzma_index *i)
+{
+ // Stream Header + Blocks + Index + Stream Footer
+ return LZMA_STREAM_HEADER_SIZE + i->total_size
+ + index_size(i->record_count, i->index_list_size)
+ + LZMA_STREAM_HEADER_SIZE;
+}
+
+
+static lzma_vli
+index_file_size(lzma_vli compressed_base, lzma_vli unpadded_sum,
+ lzma_vli record_count, lzma_vli index_list_size,
+ lzma_vli stream_padding)
+{
+ // Earlier Streams and Stream Paddings + Stream Header
+ // + Blocks + Index + Stream Footer + Stream Padding
+ //
+ // This might go over LZMA_VLI_MAX due to too big unpadded_sum
+ // when this function is used in lzma_index_append().
+ lzma_vli file_size = compressed_base + 2 * LZMA_STREAM_HEADER_SIZE
+ + stream_padding + vli_ceil4(unpadded_sum);
+ if (file_size > LZMA_VLI_MAX)
+ return LZMA_VLI_UNKNOWN;
+
+ // The same applies here.
+ file_size += index_size(record_count, index_list_size);
+ if (file_size > LZMA_VLI_MAX)
+ return LZMA_VLI_UNKNOWN;
+
+ return file_size;
+}
+
+
+extern LZMA_API(lzma_vli)
+lzma_index_file_size(const lzma_index *i)
+{
+ const index_stream *s = (const index_stream *)(i->streams.rightmost);
+ const index_group *g = (const index_group *)(s->groups.rightmost);
+ return index_file_size(s->node.compressed_base,
+ g == NULL ? 0 : g->records[g->last].unpadded_sum,
+ s->record_count, s->index_list_size,
+ s->stream_padding);
+}
+
+
+extern LZMA_API(lzma_vli)
+lzma_index_uncompressed_size(const lzma_index *i)
+{
+ return i->uncompressed_size;
+}
+
+
+extern LZMA_API(uint32_t)
+lzma_index_checks(const lzma_index *i)
+{
+ uint32_t checks = i->checks;
+
+ // Get the type of the Check of the last Stream too.
+ const index_stream *s = (const index_stream *)(i->streams.rightmost);
+ if (s->stream_flags.version != UINT32_MAX)
+ checks |= UINT32_C(1) << s->stream_flags.check;
+
+ return checks;
+}
+
+
+extern uint32_t
+lzma_index_padding_size(const lzma_index *i)
+{
+ return (LZMA_VLI_C(4) - index_size_unpadded(
+ i->record_count, i->index_list_size)) & 3;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_index_stream_flags(lzma_index *i, const lzma_stream_flags *stream_flags)
+{
+ if (i == NULL || stream_flags == NULL)
+ return LZMA_PROG_ERROR;
+
+ // Validate the Stream Flags.
+ return_if_error(lzma_stream_flags_compare(
+ stream_flags, stream_flags));
+
+ index_stream *s = (index_stream *)(i->streams.rightmost);
+ s->stream_flags = *stream_flags;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_index_stream_padding(lzma_index *i, lzma_vli stream_padding)
+{
+ if (i == NULL || stream_padding > LZMA_VLI_MAX
+ || (stream_padding & 3) != 0)
+ return LZMA_PROG_ERROR;
+
+ index_stream *s = (index_stream *)(i->streams.rightmost);
+
+ // Check that the new value won't make the file grow too big.
+ const lzma_vli old_stream_padding = s->stream_padding;
+ s->stream_padding = 0;
+ if (lzma_index_file_size(i) + stream_padding > LZMA_VLI_MAX) {
+ s->stream_padding = old_stream_padding;
+ return LZMA_DATA_ERROR;
+ }
+
+ s->stream_padding = stream_padding;
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_index_append(lzma_index *i, const lzma_allocator *allocator,
+ lzma_vli unpadded_size, lzma_vli uncompressed_size)
+{
+ // Validate.
+ if (i == NULL || unpadded_size < UNPADDED_SIZE_MIN
+ || unpadded_size > UNPADDED_SIZE_MAX
+ || uncompressed_size > LZMA_VLI_MAX)
+ return LZMA_PROG_ERROR;
+
+ index_stream *s = (index_stream *)(i->streams.rightmost);
+ index_group *g = (index_group *)(s->groups.rightmost);
+
+ const lzma_vli compressed_base = g == NULL ? 0
+ : vli_ceil4(g->records[g->last].unpadded_sum);
+ const lzma_vli uncompressed_base = g == NULL ? 0
+ : g->records[g->last].uncompressed_sum;
+ const uint32_t index_list_size_add = lzma_vli_size(unpadded_size)
+ + lzma_vli_size(uncompressed_size);
+
+ // Check that uncompressed size will not overflow.
+ if (uncompressed_base + uncompressed_size > LZMA_VLI_MAX)
+ return LZMA_DATA_ERROR;
+
+ // Check that the new unpadded sum will not overflow. This is
+ // checked again in index_file_size(), but the unpadded sum is
+ // passed to vli_ceil4() which expects a valid lzma_vli value.
+ if (compressed_base + unpadded_size > UNPADDED_SIZE_MAX)
+ return LZMA_DATA_ERROR;
+
+ // Check that the file size will stay within limits.
+ if (index_file_size(s->node.compressed_base,
+ compressed_base + unpadded_size, s->record_count + 1,
+ s->index_list_size + index_list_size_add,
+ s->stream_padding) == LZMA_VLI_UNKNOWN)
+ return LZMA_DATA_ERROR;
+
+ // The size of the Index field must not exceed the maximum value
+ // that can be stored in the Backward Size field.
+ if (index_size(i->record_count + 1,
+ i->index_list_size + index_list_size_add)
+ > LZMA_BACKWARD_SIZE_MAX)
+ return LZMA_DATA_ERROR;
+
+ if (g != NULL && g->last + 1 < g->allocated) {
+ // There is space in the last group at least for one Record.
+ ++g->last;
+ } else {
+ // We need to allocate a new group.
+ g = lzma_alloc(sizeof(index_group)
+ + i->prealloc * sizeof(index_record),
+ allocator);
+ if (g == NULL)
+ return LZMA_MEM_ERROR;
+
+ g->last = 0;
+ g->allocated = i->prealloc;
+
+ // Reset prealloc so that if the application happens to
+ // add new Records, the allocation size will be sane.
+ i->prealloc = INDEX_GROUP_SIZE;
+
+ // Set the start offsets of this group.
+ g->node.uncompressed_base = uncompressed_base;
+ g->node.compressed_base = compressed_base;
+ g->number_base = s->record_count + 1;
+
+ // Add the new group to the Stream.
+ index_tree_append(&s->groups, &g->node);
+ }
+
+ // Add the new Record to the group.
+ g->records[g->last].uncompressed_sum
+ = uncompressed_base + uncompressed_size;
+ g->records[g->last].unpadded_sum
+ = compressed_base + unpadded_size;
+
+ // Update the totals.
+ ++s->record_count;
+ s->index_list_size += index_list_size_add;
+
+ i->total_size += vli_ceil4(unpadded_size);
+ i->uncompressed_size += uncompressed_size;
+ ++i->record_count;
+ i->index_list_size += index_list_size_add;
+
+ return LZMA_OK;
+}
+
+
+/// Structure to pass info to index_cat_helper()
+typedef struct {
+ /// Uncompressed size of the destination
+ lzma_vli uncompressed_size;
+
+ /// Compressed file size of the destination
+ lzma_vli file_size;
+
+ /// Same as above but for Block numbers
+ lzma_vli block_number_add;
+
+ /// Number of Streams that were in the destination index before we
+ /// started appending new Streams from the source index. This is
+ /// used to fix the Stream numbering.
+ uint32_t stream_number_add;
+
+ /// Destination index' Stream tree
+ index_tree *streams;
+
+} index_cat_info;
+
+
+/// Add the Stream nodes from the source index to dest using recursion.
+/// Simplest iterative traversal of the source tree wouldn't work, because
+/// we update the pointers in nodes when moving them to the destination tree.
+static void
+index_cat_helper(const index_cat_info *info, index_stream *this)
+{
+ index_stream *left = (index_stream *)(this->node.left);
+ index_stream *right = (index_stream *)(this->node.right);
+
+ if (left != NULL)
+ index_cat_helper(info, left);
+
+ this->node.uncompressed_base += info->uncompressed_size;
+ this->node.compressed_base += info->file_size;
+ this->number += info->stream_number_add;
+ this->block_number_base += info->block_number_add;
+ index_tree_append(info->streams, &this->node);
+
+ if (right != NULL)
+ index_cat_helper(info, right);
+
+ return;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_index_cat(lzma_index *restrict dest, lzma_index *restrict src,
+ const lzma_allocator *allocator)
+{
+ if (dest == NULL || src == NULL)
+ return LZMA_PROG_ERROR;
+
+ const lzma_vli dest_file_size = lzma_index_file_size(dest);
+
+ // Check that we don't exceed the file size limits.
+ if (dest_file_size + lzma_index_file_size(src) > LZMA_VLI_MAX
+ || dest->uncompressed_size + src->uncompressed_size
+ > LZMA_VLI_MAX)
+ return LZMA_DATA_ERROR;
+
+ // Check that the encoded size of the combined lzma_indexes stays
+ // within limits. In theory, this should be done only if we know
+ // that the user plans to actually combine the Streams and thus
+ // construct a single Index (probably rare). However, exceeding
+ // this limit is quite theoretical, so we do this check always
+ // to simplify things elsewhere.
+ {
+ const lzma_vli dest_size = index_size_unpadded(
+ dest->record_count, dest->index_list_size);
+ const lzma_vli src_size = index_size_unpadded(
+ src->record_count, src->index_list_size);
+ if (vli_ceil4(dest_size + src_size) > LZMA_BACKWARD_SIZE_MAX)
+ return LZMA_DATA_ERROR;
+ }
+
+ // Optimize the last group to minimize memory usage. Allocation has
+ // to be done before modifying dest or src.
+ {
+ index_stream *s = (index_stream *)(dest->streams.rightmost);
+ index_group *g = (index_group *)(s->groups.rightmost);
+ if (g != NULL && g->last + 1 < g->allocated) {
+ assert(g->node.left == NULL);
+ assert(g->node.right == NULL);
+
+ index_group *newg = lzma_alloc(sizeof(index_group)
+ + (g->last + 1)
+ * sizeof(index_record),
+ allocator);
+ if (newg == NULL)
+ return LZMA_MEM_ERROR;
+
+ newg->node = g->node;
+ newg->allocated = g->last + 1;
+ newg->last = g->last;
+ newg->number_base = g->number_base;
+
+ memcpy(newg->records, g->records, newg->allocated
+ * sizeof(index_record));
+
+ if (g->node.parent != NULL) {
+ assert(g->node.parent->right == &g->node);
+ g->node.parent->right = &newg->node;
+ }
+
+ if (s->groups.leftmost == &g->node) {
+ assert(s->groups.root == &g->node);
+ s->groups.leftmost = &newg->node;
+ s->groups.root = &newg->node;
+ }
+
+ assert(s->groups.rightmost == &g->node);
+ s->groups.rightmost = &newg->node;
+
+ lzma_free(g, allocator);
+
+ // NOTE: newg isn't leaked here because
+ // newg == (void *)&newg->node.
+ }
+ }
+
+ // dest->checks includes the check types of all except the last Stream
+ // in dest. Set the bit for the check type of the last Stream now so
+ // that it won't get lost when Stream(s) from src are appended to dest.
+ dest->checks = lzma_index_checks(dest);
+
+ // Add all the Streams from src to dest. Update the base offsets
+ // of each Stream from src.
+ const index_cat_info info = {
+ .uncompressed_size = dest->uncompressed_size,
+ .file_size = dest_file_size,
+ .stream_number_add = dest->streams.count,
+ .block_number_add = dest->record_count,
+ .streams = &dest->streams,
+ };
+ index_cat_helper(&info, (index_stream *)(src->streams.root));
+
+ // Update info about all the combined Streams.
+ dest->uncompressed_size += src->uncompressed_size;
+ dest->total_size += src->total_size;
+ dest->record_count += src->record_count;
+ dest->index_list_size += src->index_list_size;
+ dest->checks |= src->checks;
+
+ // There's nothing else left in src than the base structure.
+ lzma_free(src, allocator);
+
+ return LZMA_OK;
+}
+
+
+/// Duplicate an index_stream.
+static index_stream *
+index_dup_stream(const index_stream *src, const lzma_allocator *allocator)
+{
+ // Catch a somewhat theoretical integer overflow.
+ if (src->record_count > PREALLOC_MAX)
+ return NULL;
+
+ // Allocate and initialize a new Stream.
+ index_stream *dest = index_stream_init(src->node.compressed_base,
+ src->node.uncompressed_base, src->number,
+ src->block_number_base, allocator);
+ if (dest == NULL)
+ return NULL;
+
+ // Copy the overall information.
+ dest->record_count = src->record_count;
+ dest->index_list_size = src->index_list_size;
+ dest->stream_flags = src->stream_flags;
+ dest->stream_padding = src->stream_padding;
+
+ // Return if there are no groups to duplicate.
+ if (src->groups.leftmost == NULL)
+ return dest;
+
+ // Allocate memory for the Records. We put all the Records into
+ // a single group. It's simplest and also tends to make
+ // lzma_index_locate() a little bit faster with very big Indexes.
+ index_group *destg = lzma_alloc(sizeof(index_group)
+ + src->record_count * sizeof(index_record),
+ allocator);
+ if (destg == NULL) {
+ index_stream_end(dest, allocator);
+ return NULL;
+ }
+
+ // Initialize destg.
+ destg->node.uncompressed_base = 0;
+ destg->node.compressed_base = 0;
+ destg->number_base = 1;
+ destg->allocated = src->record_count;
+ destg->last = src->record_count - 1;
+
+ // Go through all the groups in src and copy the Records into destg.
+ const index_group *srcg = (const index_group *)(src->groups.leftmost);
+ size_t i = 0;
+ do {
+ memcpy(destg->records + i, srcg->records,
+ (srcg->last + 1) * sizeof(index_record));
+ i += srcg->last + 1;
+ srcg = index_tree_next(&srcg->node);
+ } while (srcg != NULL);
+
+ assert(i == destg->allocated);
+
+ // Add the group to the new Stream.
+ index_tree_append(&dest->groups, &destg->node);
+
+ return dest;
+}
+
+
+extern LZMA_API(lzma_index *)
+lzma_index_dup(const lzma_index *src, const lzma_allocator *allocator)
+{
+ // Allocate the base structure (no initial Stream).
+ lzma_index *dest = index_init_plain(allocator);
+ if (dest == NULL)
+ return NULL;
+
+ // Copy the totals.
+ dest->uncompressed_size = src->uncompressed_size;
+ dest->total_size = src->total_size;
+ dest->record_count = src->record_count;
+ dest->index_list_size = src->index_list_size;
+
+ // Copy the Streams and the groups in them.
+ const index_stream *srcstream
+ = (const index_stream *)(src->streams.leftmost);
+ do {
+ index_stream *deststream = index_dup_stream(
+ srcstream, allocator);
+ if (deststream == NULL) {
+ lzma_index_end(dest, allocator);
+ return NULL;
+ }
+
+ index_tree_append(&dest->streams, &deststream->node);
+
+ srcstream = index_tree_next(&srcstream->node);
+ } while (srcstream != NULL);
+
+ return dest;
+}
+
+
+/// Indexing for lzma_index_iter.internal[]
+enum {
+ ITER_INDEX,
+ ITER_STREAM,
+ ITER_GROUP,
+ ITER_RECORD,
+ ITER_METHOD,
+};
+
+
+/// Values for lzma_index_iter.internal[ITER_METHOD].s
+enum {
+ ITER_METHOD_NORMAL,
+ ITER_METHOD_NEXT,
+ ITER_METHOD_LEFTMOST,
+};
+
+
+static void
+iter_set_info(lzma_index_iter *iter)
+{
+ const lzma_index *i = iter->internal[ITER_INDEX].p;
+ const index_stream *stream = iter->internal[ITER_STREAM].p;
+ const index_group *group = iter->internal[ITER_GROUP].p;
+ const size_t record = iter->internal[ITER_RECORD].s;
+
+ // lzma_index_iter.internal must not contain a pointer to the last
+ // group in the index, because that may be reallocated by
+ // lzma_index_cat().
+ if (group == NULL) {
+ // There are no groups.
+ assert(stream->groups.root == NULL);
+ iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST;
+
+ } else if (i->streams.rightmost != &stream->node
+ || stream->groups.rightmost != &group->node) {
+ // The group is not not the last group in the index.
+ iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL;
+
+ } else if (stream->groups.leftmost != &group->node) {
+ // The group isn't the only group in the Stream, thus we
+ // know that it must have a parent group i.e. it's not
+ // the root node.
+ assert(stream->groups.root != &group->node);
+ assert(group->node.parent->right == &group->node);
+ iter->internal[ITER_METHOD].s = ITER_METHOD_NEXT;
+ iter->internal[ITER_GROUP].p = group->node.parent;
+
+ } else {
+ // The Stream has only one group.
+ assert(stream->groups.root == &group->node);
+ assert(group->node.parent == NULL);
+ iter->internal[ITER_METHOD].s = ITER_METHOD_LEFTMOST;
+ iter->internal[ITER_GROUP].p = NULL;
+ }
+
+ // NOTE: lzma_index_iter.stream.number is lzma_vli but we use uint32_t
+ // internally.
+ iter->stream.number = stream->number;
+ iter->stream.block_count = stream->record_count;
+ iter->stream.compressed_offset = stream->node.compressed_base;
+ iter->stream.uncompressed_offset = stream->node.uncompressed_base;
+
+ // iter->stream.flags will be NULL if the Stream Flags haven't been
+ // set with lzma_index_stream_flags().
+ iter->stream.flags = stream->stream_flags.version == UINT32_MAX
+ ? NULL : &stream->stream_flags;
+ iter->stream.padding = stream->stream_padding;
+
+ if (stream->groups.rightmost == NULL) {
+ // Stream has no Blocks.
+ iter->stream.compressed_size = index_size(0, 0)
+ + 2 * LZMA_STREAM_HEADER_SIZE;
+ iter->stream.uncompressed_size = 0;
+ } else {
+ const index_group *g = (const index_group *)(
+ stream->groups.rightmost);
+
+ // Stream Header + Stream Footer + Index + Blocks
+ iter->stream.compressed_size = 2 * LZMA_STREAM_HEADER_SIZE
+ + index_size(stream->record_count,
+ stream->index_list_size)
+ + vli_ceil4(g->records[g->last].unpadded_sum);
+ iter->stream.uncompressed_size
+ = g->records[g->last].uncompressed_sum;
+ }
+
+ if (group != NULL) {
+ iter->block.number_in_stream = group->number_base + record;
+ iter->block.number_in_file = iter->block.number_in_stream
+ + stream->block_number_base;
+
+ iter->block.compressed_stream_offset
+ = record == 0 ? group->node.compressed_base
+ : vli_ceil4(group->records[
+ record - 1].unpadded_sum);
+ iter->block.uncompressed_stream_offset
+ = record == 0 ? group->node.uncompressed_base
+ : group->records[record - 1].uncompressed_sum;
+
+ iter->block.uncompressed_size
+ = group->records[record].uncompressed_sum
+ - iter->block.uncompressed_stream_offset;
+ iter->block.unpadded_size
+ = group->records[record].unpadded_sum
+ - iter->block.compressed_stream_offset;
+ iter->block.total_size = vli_ceil4(iter->block.unpadded_size);
+
+ iter->block.compressed_stream_offset
+ += LZMA_STREAM_HEADER_SIZE;
+
+ iter->block.compressed_file_offset
+ = iter->block.compressed_stream_offset
+ + iter->stream.compressed_offset;
+ iter->block.uncompressed_file_offset
+ = iter->block.uncompressed_stream_offset
+ + iter->stream.uncompressed_offset;
+ }
+
+ return;
+}
+
+
+extern LZMA_API(void)
+lzma_index_iter_init(lzma_index_iter *iter, const lzma_index *i)
+{
+ iter->internal[ITER_INDEX].p = i;
+ lzma_index_iter_rewind(iter);
+ return;
+}
+
+
+extern LZMA_API(void)
+lzma_index_iter_rewind(lzma_index_iter *iter)
+{
+ iter->internal[ITER_STREAM].p = NULL;
+ iter->internal[ITER_GROUP].p = NULL;
+ iter->internal[ITER_RECORD].s = 0;
+ iter->internal[ITER_METHOD].s = ITER_METHOD_NORMAL;
+ return;
+}
+
+
+extern LZMA_API(lzma_bool)
+lzma_index_iter_next(lzma_index_iter *iter, lzma_index_iter_mode mode)
+{
+ // Catch unsupported mode values.
+ if ((unsigned int)(mode) > LZMA_INDEX_ITER_NONEMPTY_BLOCK)
+ return true;
+
+ const lzma_index *i = iter->internal[ITER_INDEX].p;
+ const index_stream *stream = iter->internal[ITER_STREAM].p;
+ const index_group *group = NULL;
+ size_t record = iter->internal[ITER_RECORD].s;
+
+ // If we are being asked for the next Stream, leave group to NULL
+ // so that the rest of the this function thinks that this Stream
+ // has no groups and will thus go to the next Stream.
+ if (mode != LZMA_INDEX_ITER_STREAM) {
+ // Get the pointer to the current group. See iter_set_inf()
+ // for explanation.
+ switch (iter->internal[ITER_METHOD].s) {
+ case ITER_METHOD_NORMAL:
+ group = iter->internal[ITER_GROUP].p;
+ break;
+
+ case ITER_METHOD_NEXT:
+ group = index_tree_next(iter->internal[ITER_GROUP].p);
+ break;
+
+ case ITER_METHOD_LEFTMOST:
+ group = (const index_group *)(
+ stream->groups.leftmost);
+ break;
+ }
+ }
+
+again:
+ if (stream == NULL) {
+ // We at the beginning of the lzma_index.
+ // Locate the first Stream.
+ stream = (const index_stream *)(i->streams.leftmost);
+ if (mode >= LZMA_INDEX_ITER_BLOCK) {
+ // Since we are being asked to return information
+ // about the first a Block, skip Streams that have
+ // no Blocks.
+ while (stream->groups.leftmost == NULL) {
+ stream = index_tree_next(&stream->node);
+ if (stream == NULL)
+ return true;
+ }
+ }
+
+ // Start from the first Record in the Stream.
+ group = (const index_group *)(stream->groups.leftmost);
+ record = 0;
+
+ } else if (group != NULL && record < group->last) {
+ // The next Record is in the same group.
+ ++record;
+
+ } else {
+ // This group has no more Records or this Stream has
+ // no Blocks at all.
+ record = 0;
+
+ // If group is not NULL, this Stream has at least one Block
+ // and thus at least one group. Find the next group.
+ if (group != NULL)
+ group = index_tree_next(&group->node);
+
+ if (group == NULL) {
+ // This Stream has no more Records. Find the next
+ // Stream. If we are being asked to return information
+ // about a Block, we skip empty Streams.
+ do {
+ stream = index_tree_next(&stream->node);
+ if (stream == NULL)
+ return true;
+ } while (mode >= LZMA_INDEX_ITER_BLOCK
+ && stream->groups.leftmost == NULL);
+
+ group = (const index_group *)(
+ stream->groups.leftmost);
+ }
+ }
+
+ if (mode == LZMA_INDEX_ITER_NONEMPTY_BLOCK) {
+ // We need to look for the next Block again if this Block
+ // is empty.
+ if (record == 0) {
+ if (group->node.uncompressed_base
+ == group->records[0].uncompressed_sum)
+ goto again;
+ } else if (group->records[record - 1].uncompressed_sum
+ == group->records[record].uncompressed_sum) {
+ goto again;
+ }
+ }
+
+ iter->internal[ITER_STREAM].p = stream;
+ iter->internal[ITER_GROUP].p = group;
+ iter->internal[ITER_RECORD].s = record;
+
+ iter_set_info(iter);
+
+ return false;
+}
+
+
+extern LZMA_API(lzma_bool)
+lzma_index_iter_locate(lzma_index_iter *iter, lzma_vli target)
+{
+ const lzma_index *i = iter->internal[ITER_INDEX].p;
+
+ // If the target is past the end of the file, return immediately.
+ if (i->uncompressed_size <= target)
+ return true;
+
+ // Locate the Stream containing the target offset.
+ const index_stream *stream = index_tree_locate(&i->streams, target);
+ assert(stream != NULL);
+ target -= stream->node.uncompressed_base;
+
+ // Locate the group containing the target offset.
+ const index_group *group = index_tree_locate(&stream->groups, target);
+ assert(group != NULL);
+
+ // Use binary search to locate the exact Record. It is the first
+ // Record whose uncompressed_sum is greater than target.
+ // This is because we want the rightmost Record that fulfills the
+ // search criterion. It is possible that there are empty Blocks;
+ // we don't want to return them.
+ size_t left = 0;
+ size_t right = group->last;
+
+ while (left < right) {
+ const size_t pos = left + (right - left) / 2;
+ if (group->records[pos].uncompressed_sum <= target)
+ left = pos + 1;
+ else
+ right = pos;
+ }
+
+ iter->internal[ITER_STREAM].p = stream;
+ iter->internal[ITER_GROUP].p = group;
+ iter->internal[ITER_RECORD].s = left;
+
+ iter_set_info(iter);
+
+ return false;
+}
diff --git a/src/liblzma/common/index.h b/src/liblzma/common/index.h
new file mode 100644
index 0000000..7b27d70
--- /dev/null
+++ b/src/liblzma/common/index.h
@@ -0,0 +1,81 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file index.h
+/// \brief Handling of Index
+/// \note This header file does not include common.h or lzma.h because
+/// this file is needed by both liblzma internally and by the
+/// tests. Including common.h will include and define many things
+/// the tests do not need and prevents issues with header file
+/// include order. This way, if lzma.h or common.h are not
+/// included before this file it will break on every OS instead
+/// of causing more subtle errors.
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_INDEX_H
+#define LZMA_INDEX_H
+
+
+/// Minimum Unpadded Size
+#define UNPADDED_SIZE_MIN LZMA_VLI_C(5)
+
+/// Maximum Unpadded Size
+#define UNPADDED_SIZE_MAX (LZMA_VLI_MAX & ~LZMA_VLI_C(3))
+
+/// Index Indicator based on xz specification
+#define INDEX_INDICATOR 0
+
+
+/// Get the size of the Index Padding field. This is needed by Index encoder
+/// and decoder, but applications should have no use for this.
+extern uint32_t lzma_index_padding_size(const lzma_index *i);
+
+
+/// Set for how many Records to allocate memory the next time
+/// lzma_index_append() needs to allocate space for a new Record.
+/// This is used only by the Index decoder.
+extern void lzma_index_prealloc(lzma_index *i, lzma_vli records);
+
+
+/// Round the variable-length integer to the next multiple of four.
+static inline lzma_vli
+vli_ceil4(lzma_vli vli)
+{
+ assert(vli <= UNPADDED_SIZE_MAX);
+ return (vli + 3) & ~LZMA_VLI_C(3);
+}
+
+
+/// Calculate the size of the Index field excluding Index Padding
+static inline lzma_vli
+index_size_unpadded(lzma_vli count, lzma_vli index_list_size)
+{
+ // Index Indicator + Number of Records + List of Records + CRC32
+ return 1 + lzma_vli_size(count) + index_list_size + 4;
+}
+
+
+/// Calculate the size of the Index field including Index Padding
+static inline lzma_vli
+index_size(lzma_vli count, lzma_vli index_list_size)
+{
+ return vli_ceil4(index_size_unpadded(count, index_list_size));
+}
+
+
+/// Calculate the total size of the Stream
+static inline lzma_vli
+index_stream_size(lzma_vli blocks_size,
+ lzma_vli count, lzma_vli index_list_size)
+{
+ return LZMA_STREAM_HEADER_SIZE + blocks_size
+ + index_size(count, index_list_size)
+ + LZMA_STREAM_HEADER_SIZE;
+}
+
+#endif
diff --git a/src/liblzma/common/index_decoder.c b/src/liblzma/common/index_decoder.c
new file mode 100644
index 0000000..19a31b3
--- /dev/null
+++ b/src/liblzma/common/index_decoder.c
@@ -0,0 +1,362 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file index_decoder.c
+/// \brief Decodes the Index field
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "index_decoder.h"
+#include "check.h"
+
+
+typedef struct {
+ enum {
+ SEQ_INDICATOR,
+ SEQ_COUNT,
+ SEQ_MEMUSAGE,
+ SEQ_UNPADDED,
+ SEQ_UNCOMPRESSED,
+ SEQ_PADDING_INIT,
+ SEQ_PADDING,
+ SEQ_CRC32,
+ } sequence;
+
+ /// Memory usage limit
+ uint64_t memlimit;
+
+ /// Target Index
+ lzma_index *index;
+
+ /// Pointer give by the application, which is set after
+ /// successful decoding.
+ lzma_index **index_ptr;
+
+ /// Number of Records left to decode.
+ lzma_vli count;
+
+ /// The most recent Unpadded Size field
+ lzma_vli unpadded_size;
+
+ /// The most recent Uncompressed Size field
+ lzma_vli uncompressed_size;
+
+ /// Position in integers
+ size_t pos;
+
+ /// CRC32 of the List of Records field
+ uint32_t crc32;
+} lzma_index_coder;
+
+
+static lzma_ret
+index_decode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size,
+ uint8_t *restrict out lzma_attribute((__unused__)),
+ size_t *restrict out_pos lzma_attribute((__unused__)),
+ size_t out_size lzma_attribute((__unused__)),
+ lzma_action action lzma_attribute((__unused__)))
+{
+ lzma_index_coder *coder = coder_ptr;
+
+ // Similar optimization as in index_encoder.c
+ const size_t in_start = *in_pos;
+ lzma_ret ret = LZMA_OK;
+
+ while (*in_pos < in_size)
+ switch (coder->sequence) {
+ case SEQ_INDICATOR:
+ // Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
+ // LZMA_FORMAT_ERROR, because a typical usage case for Index
+ // decoder is when parsing the Stream backwards. If seeking
+ // backward from the Stream Footer gives us something that
+ // doesn't begin with Index Indicator, the file is considered
+ // corrupt, not "programming error" or "unrecognized file
+ // format". One could argue that the application should
+ // verify the Index Indicator before trying to decode the
+ // Index, but well, I suppose it is simpler this way.
+ if (in[(*in_pos)++] != INDEX_INDICATOR)
+ return LZMA_DATA_ERROR;
+
+ coder->sequence = SEQ_COUNT;
+ break;
+
+ case SEQ_COUNT:
+ ret = lzma_vli_decode(&coder->count, &coder->pos,
+ in, in_pos, in_size);
+ if (ret != LZMA_STREAM_END)
+ goto out;
+
+ coder->pos = 0;
+ coder->sequence = SEQ_MEMUSAGE;
+
+ // Fall through
+
+ case SEQ_MEMUSAGE:
+ if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
+ ret = LZMA_MEMLIMIT_ERROR;
+ goto out;
+ }
+
+ // Tell the Index handling code how many Records this
+ // Index has to allow it to allocate memory more efficiently.
+ lzma_index_prealloc(coder->index, coder->count);
+
+ ret = LZMA_OK;
+ coder->sequence = coder->count == 0
+ ? SEQ_PADDING_INIT : SEQ_UNPADDED;
+ break;
+
+ case SEQ_UNPADDED:
+ case SEQ_UNCOMPRESSED: {
+ lzma_vli *size = coder->sequence == SEQ_UNPADDED
+ ? &coder->unpadded_size
+ : &coder->uncompressed_size;
+
+ ret = lzma_vli_decode(size, &coder->pos,
+ in, in_pos, in_size);
+ if (ret != LZMA_STREAM_END)
+ goto out;
+
+ ret = LZMA_OK;
+ coder->pos = 0;
+
+ if (coder->sequence == SEQ_UNPADDED) {
+ // Validate that encoded Unpadded Size isn't too small
+ // or too big.
+ if (coder->unpadded_size < UNPADDED_SIZE_MIN
+ || coder->unpadded_size
+ > UNPADDED_SIZE_MAX)
+ return LZMA_DATA_ERROR;
+
+ coder->sequence = SEQ_UNCOMPRESSED;
+ } else {
+ // Add the decoded Record to the Index.
+ return_if_error(lzma_index_append(
+ coder->index, allocator,
+ coder->unpadded_size,
+ coder->uncompressed_size));
+
+ // Check if this was the last Record.
+ coder->sequence = --coder->count == 0
+ ? SEQ_PADDING_INIT
+ : SEQ_UNPADDED;
+ }
+
+ break;
+ }
+
+ case SEQ_PADDING_INIT:
+ coder->pos = lzma_index_padding_size(coder->index);
+ coder->sequence = SEQ_PADDING;
+
+ // Fall through
+
+ case SEQ_PADDING:
+ if (coder->pos > 0) {
+ --coder->pos;
+ if (in[(*in_pos)++] != 0x00)
+ return LZMA_DATA_ERROR;
+
+ break;
+ }
+
+ // Finish the CRC32 calculation.
+ coder->crc32 = lzma_crc32(in + in_start,
+ *in_pos - in_start, coder->crc32);
+
+ coder->sequence = SEQ_CRC32;
+
+ // Fall through
+
+ case SEQ_CRC32:
+ do {
+ if (*in_pos == in_size)
+ return LZMA_OK;
+
+ if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
+ != in[(*in_pos)++]) {
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ return LZMA_DATA_ERROR;
+#endif
+ }
+
+ } while (++coder->pos < 4);
+
+ // Decoding was successful, now we can let the application
+ // see the decoded Index.
+ *coder->index_ptr = coder->index;
+
+ // Make index NULL so we don't free it unintentionally.
+ coder->index = NULL;
+
+ return LZMA_STREAM_END;
+
+ default:
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+out:
+ // Update the CRC32.
+ //
+ // Avoid null pointer + 0 (undefined behavior) in "in + in_start".
+ // In such a case we had no input and thus in_used == 0.
+ {
+ const size_t in_used = *in_pos - in_start;
+ if (in_used > 0)
+ coder->crc32 = lzma_crc32(in + in_start,
+ in_used, coder->crc32);
+ }
+
+ return ret;
+}
+
+
+static void
+index_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_index_coder *coder = coder_ptr;
+ lzma_index_end(coder->index, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+index_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
+ uint64_t *old_memlimit, uint64_t new_memlimit)
+{
+ lzma_index_coder *coder = coder_ptr;
+
+ *memusage = lzma_index_memusage(1, coder->count);
+ *old_memlimit = coder->memlimit;
+
+ if (new_memlimit != 0) {
+ if (new_memlimit < *memusage)
+ return LZMA_MEMLIMIT_ERROR;
+
+ coder->memlimit = new_memlimit;
+ }
+
+ return LZMA_OK;
+}
+
+
+static lzma_ret
+index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
+ lzma_index **i, uint64_t memlimit)
+{
+ // Remember the pointer given by the application. We will set it
+ // to point to the decoded Index only if decoding is successful.
+ // Before that, keep it NULL so that applications can always safely
+ // pass it to lzma_index_end() no matter did decoding succeed or not.
+ coder->index_ptr = i;
+ *i = NULL;
+
+ // We always allocate a new lzma_index.
+ coder->index = lzma_index_init(allocator);
+ if (coder->index == NULL)
+ return LZMA_MEM_ERROR;
+
+ // Initialize the rest.
+ coder->sequence = SEQ_INDICATOR;
+ coder->memlimit = my_max(1, memlimit);
+ coder->count = 0; // Needs to be initialized due to _memconfig().
+ coder->pos = 0;
+ coder->crc32 = 0;
+
+ return LZMA_OK;
+}
+
+
+extern lzma_ret
+lzma_index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ lzma_index **i, uint64_t memlimit)
+{
+ lzma_next_coder_init(&lzma_index_decoder_init, next, allocator);
+
+ if (i == NULL)
+ return LZMA_PROG_ERROR;
+
+ lzma_index_coder *coder = next->coder;
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &index_decode;
+ next->end = &index_decoder_end;
+ next->memconfig = &index_decoder_memconfig;
+ coder->index = NULL;
+ } else {
+ lzma_index_end(coder->index, allocator);
+ }
+
+ return index_decoder_reset(coder, allocator, i, memlimit);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
+{
+ lzma_next_strm_init(lzma_index_decoder_init, strm, i, memlimit);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
+ const lzma_allocator *allocator,
+ const uint8_t *in, size_t *in_pos, size_t in_size)
+{
+ // Sanity checks
+ if (i == NULL || memlimit == NULL
+ || in == NULL || in_pos == NULL || *in_pos > in_size)
+ return LZMA_PROG_ERROR;
+
+ // Initialize the decoder.
+ lzma_index_coder coder;
+ return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
+
+ // Store the input start position so that we can restore it in case
+ // of an error.
+ const size_t in_start = *in_pos;
+
+ // Do the actual decoding.
+ lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
+ NULL, NULL, 0, LZMA_RUN);
+
+ if (ret == LZMA_STREAM_END) {
+ ret = LZMA_OK;
+ } else {
+ // Something went wrong, free the Index structure and restore
+ // the input position.
+ lzma_index_end(coder.index, allocator);
+ *in_pos = in_start;
+
+ if (ret == LZMA_OK) {
+ // The input is truncated or otherwise corrupt.
+ // Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
+ // like lzma_vli_decode() does in single-call mode.
+ ret = LZMA_DATA_ERROR;
+
+ } else if (ret == LZMA_MEMLIMIT_ERROR) {
+ // Tell the caller how much memory would have
+ // been needed.
+ *memlimit = lzma_index_memusage(1, coder.count);
+ }
+ }
+
+ return ret;
+}
diff --git a/src/liblzma/common/index_decoder.h b/src/liblzma/common/index_decoder.h
new file mode 100644
index 0000000..3fec483
--- /dev/null
+++ b/src/liblzma/common/index_decoder.h
@@ -0,0 +1,25 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file index_decoder.h
+/// \brief Decodes the Index field
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_INDEX_DECODER_H
+#define LZMA_INDEX_DECODER_H
+
+#include "common.h"
+#include "index.h"
+
+
+extern lzma_ret lzma_index_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ lzma_index **i, uint64_t memlimit);
+
+
+#endif
diff --git a/src/liblzma/common/index_encoder.c b/src/liblzma/common/index_encoder.c
new file mode 100644
index 0000000..204490c
--- /dev/null
+++ b/src/liblzma/common/index_encoder.c
@@ -0,0 +1,263 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file index_encoder.c
+/// \brief Encodes the Index field
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "index_encoder.h"
+#include "index.h"
+#include "check.h"
+
+
+typedef struct {
+ enum {
+ SEQ_INDICATOR,
+ SEQ_COUNT,
+ SEQ_UNPADDED,
+ SEQ_UNCOMPRESSED,
+ SEQ_NEXT,
+ SEQ_PADDING,
+ SEQ_CRC32,
+ } sequence;
+
+ /// Index being encoded
+ const lzma_index *index;
+
+ /// Iterator for the Index being encoded
+ lzma_index_iter iter;
+
+ /// Position in integers
+ size_t pos;
+
+ /// CRC32 of the List of Records field
+ uint32_t crc32;
+} lzma_index_coder;
+
+
+static lzma_ret
+index_encode(void *coder_ptr,
+ const lzma_allocator *allocator lzma_attribute((__unused__)),
+ const uint8_t *restrict in lzma_attribute((__unused__)),
+ size_t *restrict in_pos lzma_attribute((__unused__)),
+ size_t in_size lzma_attribute((__unused__)),
+ uint8_t *restrict out, size_t *restrict out_pos,
+ size_t out_size,
+ lzma_action action lzma_attribute((__unused__)))
+{
+ lzma_index_coder *coder = coder_ptr;
+
+ // Position where to start calculating CRC32. The idea is that we
+ // need to call lzma_crc32() only once per call to index_encode().
+ const size_t out_start = *out_pos;
+
+ // Return value to use if we return at the end of this function.
+ // We use "goto out" to jump out of the while-switch construct
+ // instead of returning directly, because that way we don't need
+ // to copypaste the lzma_crc32() call to many places.
+ lzma_ret ret = LZMA_OK;
+
+ while (*out_pos < out_size)
+ switch (coder->sequence) {
+ case SEQ_INDICATOR:
+ out[*out_pos] = INDEX_INDICATOR;
+ ++*out_pos;
+ coder->sequence = SEQ_COUNT;
+ break;
+
+ case SEQ_COUNT: {
+ const lzma_vli count = lzma_index_block_count(coder->index);
+ ret = lzma_vli_encode(count, &coder->pos,
+ out, out_pos, out_size);
+ if (ret != LZMA_STREAM_END)
+ goto out;
+
+ ret = LZMA_OK;
+ coder->pos = 0;
+ coder->sequence = SEQ_NEXT;
+ break;
+ }
+
+ case SEQ_NEXT:
+ if (lzma_index_iter_next(
+ &coder->iter, LZMA_INDEX_ITER_BLOCK)) {
+ // Get the size of the Index Padding field.
+ coder->pos = lzma_index_padding_size(coder->index);
+ assert(coder->pos <= 3);
+ coder->sequence = SEQ_PADDING;
+ break;
+ }
+
+ coder->sequence = SEQ_UNPADDED;
+
+ // Fall through
+
+ case SEQ_UNPADDED:
+ case SEQ_UNCOMPRESSED: {
+ const lzma_vli size = coder->sequence == SEQ_UNPADDED
+ ? coder->iter.block.unpadded_size
+ : coder->iter.block.uncompressed_size;
+
+ ret = lzma_vli_encode(size, &coder->pos,
+ out, out_pos, out_size);
+ if (ret != LZMA_STREAM_END)
+ goto out;
+
+ ret = LZMA_OK;
+ coder->pos = 0;
+
+ // Advance to SEQ_UNCOMPRESSED or SEQ_NEXT.
+ ++coder->sequence;
+ break;
+ }
+
+ case SEQ_PADDING:
+ if (coder->pos > 0) {
+ --coder->pos;
+ out[(*out_pos)++] = 0x00;
+ break;
+ }
+
+ // Finish the CRC32 calculation.
+ coder->crc32 = lzma_crc32(out + out_start,
+ *out_pos - out_start, coder->crc32);
+
+ coder->sequence = SEQ_CRC32;
+
+ // Fall through
+
+ case SEQ_CRC32:
+ // We don't use the main loop, because we don't want
+ // coder->crc32 to be touched anymore.
+ do {
+ if (*out_pos == out_size)
+ return LZMA_OK;
+
+ out[*out_pos] = (coder->crc32 >> (coder->pos * 8))
+ & 0xFF;
+ ++*out_pos;
+
+ } while (++coder->pos < 4);
+
+ return LZMA_STREAM_END;
+
+ default:
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+out:
+ // Update the CRC32.
+ //
+ // Avoid null pointer + 0 (undefined behavior) in "out + out_start".
+ // In such a case we had no input and thus out_used == 0.
+ {
+ const size_t out_used = *out_pos - out_start;
+ if (out_used > 0)
+ coder->crc32 = lzma_crc32(out + out_start,
+ out_used, coder->crc32);
+ }
+
+ return ret;
+}
+
+
+static void
+index_encoder_end(void *coder, const lzma_allocator *allocator)
+{
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static void
+index_encoder_reset(lzma_index_coder *coder, const lzma_index *i)
+{
+ lzma_index_iter_init(&coder->iter, i);
+
+ coder->sequence = SEQ_INDICATOR;
+ coder->index = i;
+ coder->pos = 0;
+ coder->crc32 = 0;
+
+ return;
+}
+
+
+extern lzma_ret
+lzma_index_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_index *i)
+{
+ lzma_next_coder_init(&lzma_index_encoder_init, next, allocator);
+
+ if (i == NULL)
+ return LZMA_PROG_ERROR;
+
+ if (next->coder == NULL) {
+ next->coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
+ if (next->coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->code = &index_encode;
+ next->end = &index_encoder_end;
+ }
+
+ index_encoder_reset(next->coder, i);
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_index_encoder(lzma_stream *strm, const lzma_index *i)
+{
+ lzma_next_strm_init(lzma_index_encoder_init, strm, i);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_index_buffer_encode(const lzma_index *i,
+ uint8_t *out, size_t *out_pos, size_t out_size)
+{
+ // Validate the arguments.
+ if (i == NULL || out == NULL || out_pos == NULL || *out_pos > out_size)
+ return LZMA_PROG_ERROR;
+
+ // Don't try to encode if there's not enough output space.
+ if (out_size - *out_pos < lzma_index_size(i))
+ return LZMA_BUF_ERROR;
+
+ // The Index encoder needs just one small data structure so we can
+ // allocate it on stack.
+ lzma_index_coder coder;
+ index_encoder_reset(&coder, i);
+
+ // Do the actual encoding. This should never fail, but store
+ // the original *out_pos just in case.
+ const size_t out_start = *out_pos;
+ lzma_ret ret = index_encode(&coder, NULL, NULL, NULL, 0,
+ out, out_pos, out_size, LZMA_RUN);
+
+ if (ret == LZMA_STREAM_END) {
+ ret = LZMA_OK;
+ } else {
+ // We should never get here, but just in case, restore the
+ // output position and set the error accordingly if something
+ // goes wrong and debugging isn't enabled.
+ assert(0);
+ *out_pos = out_start;
+ ret = LZMA_PROG_ERROR;
+ }
+
+ return ret;
+}
diff --git a/src/liblzma/common/index_encoder.h b/src/liblzma/common/index_encoder.h
new file mode 100644
index 0000000..4d55cd1
--- /dev/null
+++ b/src/liblzma/common/index_encoder.h
@@ -0,0 +1,23 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file index_encoder.h
+/// \brief Encodes the Index field
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_INDEX_ENCODER_H
+#define LZMA_INDEX_ENCODER_H
+
+#include "common.h"
+
+
+extern lzma_ret lzma_index_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator, const lzma_index *i);
+
+
+#endif
diff --git a/src/liblzma/common/index_hash.c b/src/liblzma/common/index_hash.c
new file mode 100644
index 0000000..52c3d65
--- /dev/null
+++ b/src/liblzma/common/index_hash.c
@@ -0,0 +1,343 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file index_hash.c
+/// \brief Validates Index by using a hash function
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+#include "index.h"
+#include "check.h"
+
+
+typedef struct {
+ /// Sum of the Block sizes (including Block Padding)
+ lzma_vli blocks_size;
+
+ /// Sum of the Uncompressed Size fields
+ lzma_vli uncompressed_size;
+
+ /// Number of Records
+ lzma_vli count;
+
+ /// Size of the List of Index Records as bytes
+ lzma_vli index_list_size;
+
+ /// Check calculated from Unpadded Sizes and Uncompressed Sizes.
+ lzma_check_state check;
+
+} lzma_index_hash_info;
+
+
+struct lzma_index_hash_s {
+ enum {
+ SEQ_BLOCK,
+ SEQ_COUNT,
+ SEQ_UNPADDED,
+ SEQ_UNCOMPRESSED,
+ SEQ_PADDING_INIT,
+ SEQ_PADDING,
+ SEQ_CRC32,
+ } sequence;
+
+ /// Information collected while decoding the actual Blocks.
+ lzma_index_hash_info blocks;
+
+ /// Information collected from the Index field.
+ lzma_index_hash_info records;
+
+ /// Number of Records not fully decoded
+ lzma_vli remaining;
+
+ /// Unpadded Size currently being read from an Index Record.
+ lzma_vli unpadded_size;
+
+ /// Uncompressed Size currently being read from an Index Record.
+ lzma_vli uncompressed_size;
+
+ /// Position in variable-length integers when decoding them from
+ /// the List of Records.
+ size_t pos;
+
+ /// CRC32 of the Index
+ uint32_t crc32;
+};
+
+
+extern LZMA_API(lzma_index_hash *)
+lzma_index_hash_init(lzma_index_hash *index_hash,
+ const lzma_allocator *allocator)
+{
+ if (index_hash == NULL) {
+ index_hash = lzma_alloc(sizeof(lzma_index_hash), allocator);
+ if (index_hash == NULL)
+ return NULL;
+ }
+
+ index_hash->sequence = SEQ_BLOCK;
+ index_hash->blocks.blocks_size = 0;
+ index_hash->blocks.uncompressed_size = 0;
+ index_hash->blocks.count = 0;
+ index_hash->blocks.index_list_size = 0;
+ index_hash->records.blocks_size = 0;
+ index_hash->records.uncompressed_size = 0;
+ index_hash->records.count = 0;
+ index_hash->records.index_list_size = 0;
+ index_hash->unpadded_size = 0;
+ index_hash->uncompressed_size = 0;
+ index_hash->pos = 0;
+ index_hash->crc32 = 0;
+
+ // These cannot fail because LZMA_CHECK_BEST is known to be supported.
+ (void)lzma_check_init(&index_hash->blocks.check, LZMA_CHECK_BEST);
+ (void)lzma_check_init(&index_hash->records.check, LZMA_CHECK_BEST);
+
+ return index_hash;
+}
+
+
+extern LZMA_API(void)
+lzma_index_hash_end(lzma_index_hash *index_hash,
+ const lzma_allocator *allocator)
+{
+ lzma_free(index_hash, allocator);
+ return;
+}
+
+
+extern LZMA_API(lzma_vli)
+lzma_index_hash_size(const lzma_index_hash *index_hash)
+{
+ // Get the size of the Index from ->blocks instead of ->records for
+ // cases where application wants to know the Index Size before
+ // decoding the Index.
+ return index_size(index_hash->blocks.count,
+ index_hash->blocks.index_list_size);
+}
+
+
+/// Updates the sizes and the hash without any validation.
+static void
+hash_append(lzma_index_hash_info *info, lzma_vli unpadded_size,
+ lzma_vli uncompressed_size)
+{
+ info->blocks_size += vli_ceil4(unpadded_size);
+ info->uncompressed_size += uncompressed_size;
+ info->index_list_size += lzma_vli_size(unpadded_size)
+ + lzma_vli_size(uncompressed_size);
+ ++info->count;
+
+ const lzma_vli sizes[2] = { unpadded_size, uncompressed_size };
+ lzma_check_update(&info->check, LZMA_CHECK_BEST,
+ (const uint8_t *)(sizes), sizeof(sizes));
+
+ return;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_index_hash_append(lzma_index_hash *index_hash, lzma_vli unpadded_size,
+ lzma_vli uncompressed_size)
+{
+ // Validate the arguments.
+ if (index_hash == NULL || index_hash->sequence != SEQ_BLOCK
+ || unpadded_size < UNPADDED_SIZE_MIN
+ || unpadded_size > UNPADDED_SIZE_MAX
+ || uncompressed_size > LZMA_VLI_MAX)
+ return LZMA_PROG_ERROR;
+
+ // Update the hash.
+ hash_append(&index_hash->blocks, unpadded_size, uncompressed_size);
+
+ // Validate the properties of *info are still in allowed limits.
+ if (index_hash->blocks.blocks_size > LZMA_VLI_MAX
+ || index_hash->blocks.uncompressed_size > LZMA_VLI_MAX
+ || index_size(index_hash->blocks.count,
+ index_hash->blocks.index_list_size)
+ > LZMA_BACKWARD_SIZE_MAX
+ || index_stream_size(index_hash->blocks.blocks_size,
+ index_hash->blocks.count,
+ index_hash->blocks.index_list_size)
+ > LZMA_VLI_MAX)
+ return LZMA_DATA_ERROR;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_index_hash_decode(lzma_index_hash *index_hash, const uint8_t *in,
+ size_t *in_pos, size_t in_size)
+{
+ // Catch zero input buffer here, because in contrast to Index encoder
+ // and decoder functions, applications call this function directly
+ // instead of via lzma_code(), which does the buffer checking.
+ if (*in_pos >= in_size)
+ return LZMA_BUF_ERROR;
+
+ // NOTE: This function has many similarities to index_encode() and
+ // index_decode() functions found from index_encoder.c and
+ // index_decoder.c. See the comments especially in index_encoder.c.
+ const size_t in_start = *in_pos;
+ lzma_ret ret = LZMA_OK;
+
+ while (*in_pos < in_size)
+ switch (index_hash->sequence) {
+ case SEQ_BLOCK:
+ // Check the Index Indicator is present.
+ if (in[(*in_pos)++] != INDEX_INDICATOR)
+ return LZMA_DATA_ERROR;
+
+ index_hash->sequence = SEQ_COUNT;
+ break;
+
+ case SEQ_COUNT: {
+ ret = lzma_vli_decode(&index_hash->remaining,
+ &index_hash->pos, in, in_pos, in_size);
+ if (ret != LZMA_STREAM_END)
+ goto out;
+
+ // The count must match the count of the Blocks decoded.
+ if (index_hash->remaining != index_hash->blocks.count)
+ return LZMA_DATA_ERROR;
+
+ ret = LZMA_OK;
+ index_hash->pos = 0;
+
+ // Handle the special case when there are no Blocks.
+ index_hash->sequence = index_hash->remaining == 0
+ ? SEQ_PADDING_INIT : SEQ_UNPADDED;
+ break;
+ }
+
+ case SEQ_UNPADDED:
+ case SEQ_UNCOMPRESSED: {
+ lzma_vli *size = index_hash->sequence == SEQ_UNPADDED
+ ? &index_hash->unpadded_size
+ : &index_hash->uncompressed_size;
+
+ ret = lzma_vli_decode(size, &index_hash->pos,
+ in, in_pos, in_size);
+ if (ret != LZMA_STREAM_END)
+ goto out;
+
+ ret = LZMA_OK;
+ index_hash->pos = 0;
+
+ if (index_hash->sequence == SEQ_UNPADDED) {
+ if (index_hash->unpadded_size < UNPADDED_SIZE_MIN
+ || index_hash->unpadded_size
+ > UNPADDED_SIZE_MAX)
+ return LZMA_DATA_ERROR;
+
+ index_hash->sequence = SEQ_UNCOMPRESSED;
+ } else {
+ // Update the hash.
+ hash_append(&index_hash->records,
+ index_hash->unpadded_size,
+ index_hash->uncompressed_size);
+
+ // Verify that we don't go over the known sizes. Note
+ // that this validation is simpler than the one used
+ // in lzma_index_hash_append(), because here we know
+ // that values in index_hash->blocks are already
+ // validated and we are fine as long as we don't
+ // exceed them in index_hash->records.
+ if (index_hash->blocks.blocks_size
+ < index_hash->records.blocks_size
+ || index_hash->blocks.uncompressed_size
+ < index_hash->records.uncompressed_size
+ || index_hash->blocks.index_list_size
+ < index_hash->records.index_list_size)
+ return LZMA_DATA_ERROR;
+
+ // Check if this was the last Record.
+ index_hash->sequence = --index_hash->remaining == 0
+ ? SEQ_PADDING_INIT : SEQ_UNPADDED;
+ }
+
+ break;
+ }
+
+ case SEQ_PADDING_INIT:
+ index_hash->pos = (LZMA_VLI_C(4) - index_size_unpadded(
+ index_hash->records.count,
+ index_hash->records.index_list_size)) & 3;
+ index_hash->sequence = SEQ_PADDING;
+
+ // Fall through
+
+ case SEQ_PADDING:
+ if (index_hash->pos > 0) {
+ --index_hash->pos;
+ if (in[(*in_pos)++] != 0x00)
+ return LZMA_DATA_ERROR;
+
+ break;
+ }
+
+ // Compare the sizes.
+ if (index_hash->blocks.blocks_size
+ != index_hash->records.blocks_size
+ || index_hash->blocks.uncompressed_size
+ != index_hash->records.uncompressed_size
+ || index_hash->blocks.index_list_size
+ != index_hash->records.index_list_size)
+ return LZMA_DATA_ERROR;
+
+ // Finish the hashes and compare them.
+ lzma_check_finish(&index_hash->blocks.check, LZMA_CHECK_BEST);
+ lzma_check_finish(&index_hash->records.check, LZMA_CHECK_BEST);
+ if (memcmp(index_hash->blocks.check.buffer.u8,
+ index_hash->records.check.buffer.u8,
+ lzma_check_size(LZMA_CHECK_BEST)) != 0)
+ return LZMA_DATA_ERROR;
+
+ // Finish the CRC32 calculation.
+ index_hash->crc32 = lzma_crc32(in + in_start,
+ *in_pos - in_start, index_hash->crc32);
+
+ index_hash->sequence = SEQ_CRC32;
+
+ // Fall through
+
+ case SEQ_CRC32:
+ do {
+ if (*in_pos == in_size)
+ return LZMA_OK;
+
+ if (((index_hash->crc32 >> (index_hash->pos * 8))
+ & 0xFF) != in[(*in_pos)++]) {
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ return LZMA_DATA_ERROR;
+#endif
+ }
+
+ } while (++index_hash->pos < 4);
+
+ return LZMA_STREAM_END;
+
+ default:
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+out:
+ // Update the CRC32.
+ //
+ // Avoid null pointer + 0 (undefined behavior) in "in + in_start".
+ // In such a case we had no input and thus in_used == 0.
+ {
+ const size_t in_used = *in_pos - in_start;
+ if (in_used > 0)
+ index_hash->crc32 = lzma_crc32(in + in_start,
+ in_used, index_hash->crc32);
+ }
+
+ return ret;
+}
diff --git a/src/liblzma/common/lzip_decoder.c b/src/liblzma/common/lzip_decoder.c
new file mode 100644
index 0000000..88cc7ff
--- /dev/null
+++ b/src/liblzma/common/lzip_decoder.c
@@ -0,0 +1,418 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file lzip_decoder.c
+/// \brief Decodes .lz (lzip) files
+//
+// Author: Michał Górny
+// Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lzip_decoder.h"
+#include "lzma_decoder.h"
+#include "check.h"
+
+
+// .lz format version 0 lacks the 64-bit Member size field in the footer.
+#define LZIP_V0_FOOTER_SIZE 12
+#define LZIP_V1_FOOTER_SIZE 20
+#define LZIP_FOOTER_SIZE_MAX LZIP_V1_FOOTER_SIZE
+
+// lc/lp/pb are hardcoded in the .lz format.
+#define LZIP_LC 3
+#define LZIP_LP 0
+#define LZIP_PB 2
+
+
+typedef struct {
+ enum {
+ SEQ_ID_STRING,
+ SEQ_VERSION,
+ SEQ_DICT_SIZE,
+ SEQ_CODER_INIT,
+ SEQ_LZMA_STREAM,
+ SEQ_MEMBER_FOOTER,
+ } sequence;
+
+ /// .lz member format version
+ uint32_t version;
+
+ /// CRC32 of the uncompressed data in the .lz member
+ uint32_t crc32;
+
+ /// Uncompressed size of the .lz member
+ uint64_t uncompressed_size;
+
+ /// Compressed size of the .lz member
+ uint64_t member_size;
+
+ /// Memory usage limit
+ uint64_t memlimit;
+
+ /// Amount of memory actually needed
+ uint64_t memusage;
+
+ /// If true, LZMA_GET_CHECK is returned after decoding the header
+ /// fields. As all files use CRC32 this is redundant but it's
+ /// implemented anyway since the initialization functions supports
+ /// all other flags in addition to LZMA_TELL_ANY_CHECK.
+ bool tell_any_check;
+
+ /// If true, we won't calculate or verify the CRC32 of
+ /// the uncompressed data.
+ bool ignore_check;
+
+ /// If true, we will decode concatenated .lz members and stop if
+ /// non-.lz data is seen after at least one member has been
+ /// successfully decoded.
+ bool concatenated;
+
+ /// When decoding concatenated .lz members, this is true as long as
+ /// we are decoding the first .lz member. This is needed to avoid
+ /// incorrect LZMA_FORMAT_ERROR in case there is non-.lz data at
+ /// the end of the file.
+ bool first_member;
+
+ /// Reading position in the header and footer fields
+ size_t pos;
+
+ /// Buffer to hold the .lz footer fields
+ uint8_t buffer[LZIP_FOOTER_SIZE_MAX];
+
+ /// Options decoded from the .lz header that needed to initialize
+ /// the LZMA1 decoder.
+ lzma_options_lzma options;
+
+ /// LZMA1 decoder
+ lzma_next_coder lzma_decoder;
+
+} lzma_lzip_coder;
+
+
+static lzma_ret
+lzip_decode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_lzip_coder *coder = coder_ptr;
+
+ while (true)
+ switch (coder->sequence) {
+ case SEQ_ID_STRING: {
+ // The "ID string" or magic bytes are "LZIP" in US-ASCII.
+ const uint8_t lzip_id_string[4] = { 0x4C, 0x5A, 0x49, 0x50 };
+
+ while (coder->pos < sizeof(lzip_id_string)) {
+ if (*in_pos >= in_size) {
+ // If we are on the 2nd+ concatenated member
+ // and the input ends before we can read
+ // the magic bytes, we discard the bytes that
+ // were already read (up to 3) and finish.
+ // See the reasoning below.
+ return !coder->first_member
+ && action == LZMA_FINISH
+ ? LZMA_STREAM_END : LZMA_OK;
+ }
+
+ if (in[*in_pos] != lzip_id_string[coder->pos]) {
+ // The .lz format allows putting non-.lz data
+ // at the end of the file. If we have seen
+ // at least one valid .lz member already,
+ // then we won't consume the byte at *in_pos
+ // and will return LZMA_STREAM_END. This way
+ // apps can easily locate and read the non-.lz
+ // data after the .lz member(s).
+ //
+ // NOTE: If the first 1-3 bytes of the non-.lz
+ // data match the .lz ID string then the first
+ // 1-3 bytes of the junk will get ignored by
+ // us. If apps want to properly locate the
+ // trailing data they must ensure that the
+ // first byte of their custom data isn't the
+ // same as the first byte of .lz ID string.
+ // With the liblzma API we cannot rewind the
+ // input position across calls to lzma_code().
+ return !coder->first_member
+ ? LZMA_STREAM_END : LZMA_FORMAT_ERROR;
+ }
+
+ ++*in_pos;
+ ++coder->pos;
+ }
+
+ coder->pos = 0;
+
+ coder->crc32 = 0;
+ coder->uncompressed_size = 0;
+ coder->member_size = sizeof(lzip_id_string);
+
+ coder->sequence = SEQ_VERSION;
+ }
+
+ // Fall through
+
+ case SEQ_VERSION:
+ if (*in_pos >= in_size)
+ return LZMA_OK;
+
+ coder->version = in[(*in_pos)++];
+
+ // We support version 0 and unextended version 1.
+ if (coder->version > 1)
+ return LZMA_OPTIONS_ERROR;
+
+ ++coder->member_size;
+ coder->sequence = SEQ_DICT_SIZE;
+
+ // .lz versions 0 and 1 use CRC32 as the integrity check
+ // so if the application wanted to know that
+ // (LZMA_TELL_ANY_CHECK) we can tell it now.
+ if (coder->tell_any_check)
+ return LZMA_GET_CHECK;
+
+ // Fall through
+
+ case SEQ_DICT_SIZE: {
+ if (*in_pos >= in_size)
+ return LZMA_OK;
+
+ const uint32_t ds = in[(*in_pos)++];
+ ++coder->member_size;
+
+ // The five lowest bits are for the base-2 logarithm of
+ // the dictionary size and the highest three bits are
+ // the fractional part (0/16 to 7/16) that will be
+ // subtracted to get the final value.
+ //
+ // For example, with 0xB5:
+ // b2log = 21
+ // fracnum = 5
+ // dict_size = 2^21 - 2^21 * 5 / 16 = 1408 KiB
+ const uint32_t b2log = ds & 0x1F;
+ const uint32_t fracnum = ds >> 5;
+
+ // The format versions 0 and 1 allow dictionary size in the
+ // range [4 KiB, 512 MiB].
+ if (b2log < 12 || b2log > 29 || (b2log == 12 && fracnum > 0))
+ return LZMA_DATA_ERROR;
+
+ // 2^[b2log] - 2^[b2log] * [fracnum] / 16
+ // = 2^[b2log] - [fracnum] * 2^([b2log] - 4)
+ coder->options.dict_size = (UINT32_C(1) << b2log)
+ - (fracnum << (b2log - 4));
+
+ assert(coder->options.dict_size >= 4096);
+ assert(coder->options.dict_size <= (UINT32_C(512) << 20));
+
+ coder->options.preset_dict = NULL;
+ coder->options.lc = LZIP_LC;
+ coder->options.lp = LZIP_LP;
+ coder->options.pb = LZIP_PB;
+
+ // Calculate the memory usage.
+ coder->memusage = lzma_lzma_decoder_memusage(&coder->options)
+ + LZMA_MEMUSAGE_BASE;
+
+ // Initialization is a separate step because if we return
+ // LZMA_MEMLIMIT_ERROR we need to be able to restart after
+ // the memlimit has been increased.
+ coder->sequence = SEQ_CODER_INIT;
+ }
+
+ // Fall through
+
+ case SEQ_CODER_INIT: {
+ if (coder->memusage > coder->memlimit)
+ return LZMA_MEMLIMIT_ERROR;
+
+ const lzma_filter_info filters[2] = {
+ {
+ .id = LZMA_FILTER_LZMA1,
+ .init = &lzma_lzma_decoder_init,
+ .options = &coder->options,
+ }, {
+ .init = NULL,
+ }
+ };
+
+ return_if_error(lzma_next_filter_init(&coder->lzma_decoder,
+ allocator, filters));
+
+ coder->crc32 = 0;
+ coder->sequence = SEQ_LZMA_STREAM;
+ }
+
+ // Fall through
+
+ case SEQ_LZMA_STREAM: {
+ const size_t in_start = *in_pos;
+ const size_t out_start = *out_pos;
+
+ const lzma_ret ret = coder->lzma_decoder.code(
+ coder->lzma_decoder.coder, allocator,
+ in, in_pos, in_size, out, out_pos, out_size,
+ action);
+
+ const size_t out_used = *out_pos - out_start;
+
+ coder->member_size += *in_pos - in_start;
+ coder->uncompressed_size += out_used;
+
+ // Don't update the CRC32 if the integrity check will be
+ // ignored or if there was no new output. The latter is
+ // important in case out == NULL to avoid null pointer + 0
+ // which is undefined behavior.
+ if (!coder->ignore_check && out_used > 0)
+ coder->crc32 = lzma_crc32(out + out_start, out_used,
+ coder->crc32);
+
+ if (ret != LZMA_STREAM_END)
+ return ret;
+
+ coder->sequence = SEQ_MEMBER_FOOTER;
+ }
+
+ // Fall through
+
+ case SEQ_MEMBER_FOOTER: {
+ // The footer of .lz version 0 lacks the Member size field.
+ // This is the only difference between version 0 and
+ // unextended version 1 formats.
+ const size_t footer_size = coder->version == 0
+ ? LZIP_V0_FOOTER_SIZE
+ : LZIP_V1_FOOTER_SIZE;
+
+ // Copy the CRC32, Data size, and Member size fields to
+ // the internal buffer.
+ lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
+ footer_size);
+
+ // Return if we didn't get the whole footer yet.
+ if (coder->pos < footer_size)
+ return LZMA_OK;
+
+ coder->pos = 0;
+ coder->member_size += footer_size;
+
+ // Check that the footer fields match the observed data.
+ if (!coder->ignore_check
+ && coder->crc32 != read32le(&coder->buffer[0]))
+ return LZMA_DATA_ERROR;
+
+ if (coder->uncompressed_size != read64le(&coder->buffer[4]))
+ return LZMA_DATA_ERROR;
+
+ if (coder->version > 0) {
+ // .lz version 0 has no Member size field.
+ if (coder->member_size != read64le(&coder->buffer[12]))
+ return LZMA_DATA_ERROR;
+ }
+
+ // Decoding is finished if we weren't requested to decode
+ // more than one .lz member.
+ if (!coder->concatenated)
+ return LZMA_STREAM_END;
+
+ coder->first_member = false;
+ coder->sequence = SEQ_ID_STRING;
+ break;
+ }
+
+ default:
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+ // Never reached
+}
+
+
+static void
+lzip_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_lzip_coder *coder = coder_ptr;
+ lzma_next_end(&coder->lzma_decoder, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_check
+lzip_decoder_get_check(const void *coder_ptr lzma_attribute((__unused__)))
+{
+ return LZMA_CHECK_CRC32;
+}
+
+
+static lzma_ret
+lzip_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
+ uint64_t *old_memlimit, uint64_t new_memlimit)
+{
+ lzma_lzip_coder *coder = coder_ptr;
+
+ *memusage = coder->memusage;
+ *old_memlimit = coder->memlimit;
+
+ if (new_memlimit != 0) {
+ if (new_memlimit < coder->memusage)
+ return LZMA_MEMLIMIT_ERROR;
+
+ coder->memlimit = new_memlimit;
+ }
+
+ return LZMA_OK;
+}
+
+
+extern lzma_ret
+lzma_lzip_decoder_init(
+ lzma_next_coder *next, const lzma_allocator *allocator,
+ uint64_t memlimit, uint32_t flags)
+{
+ lzma_next_coder_init(&lzma_lzip_decoder_init, next, allocator);
+
+ if (flags & ~LZMA_SUPPORTED_FLAGS)
+ return LZMA_OPTIONS_ERROR;
+
+ lzma_lzip_coder *coder = next->coder;
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_lzip_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &lzip_decode;
+ next->end = &lzip_decoder_end;
+ next->get_check = &lzip_decoder_get_check;
+ next->memconfig = &lzip_decoder_memconfig;
+
+ coder->lzma_decoder = LZMA_NEXT_CODER_INIT;
+ }
+
+ coder->sequence = SEQ_ID_STRING;
+ coder->memlimit = my_max(1, memlimit);
+ coder->memusage = LZMA_MEMUSAGE_BASE;
+ coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
+ coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0;
+ coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
+ coder->first_member = true;
+ coder->pos = 0;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_lzip_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags)
+{
+ lzma_next_strm_init(lzma_lzip_decoder_init, strm, memlimit, flags);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/lzip_decoder.h b/src/liblzma/common/lzip_decoder.h
new file mode 100644
index 0000000..33a01c3
--- /dev/null
+++ b/src/liblzma/common/lzip_decoder.h
@@ -0,0 +1,22 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file lzip_decoder.h
+/// \brief Decodes .lz (lzip) files
+//
+// Author: Michał Górny
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_LZIP_DECODER_H
+#define LZMA_LZIP_DECODER_H
+
+#include "common.h"
+
+extern lzma_ret lzma_lzip_decoder_init(
+ lzma_next_coder *next, const lzma_allocator *allocator,
+ uint64_t memlimit, uint32_t flags);
+
+#endif
diff --git a/src/liblzma/common/memcmplen.h b/src/liblzma/common/memcmplen.h
new file mode 100644
index 0000000..99d9c51
--- /dev/null
+++ b/src/liblzma/common/memcmplen.h
@@ -0,0 +1,173 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file memcmplen.h
+/// \brief Optimized comparison of two buffers
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_MEMCMPLEN_H
+#define LZMA_MEMCMPLEN_H
+
+#include "common.h"
+
+#ifdef HAVE_IMMINTRIN_H
+# include <immintrin.h>
+#endif
+
+// Only include <intrin.h> if it is needed. The header is only needed
+// on Windows when using an MSVC compatible compiler. The Intel compiler
+// can use the intrinsics without the header file.
+#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
+ && defined(_MSC_VER) \
+ && defined(_M_X64) \
+ && !defined(__INTEL_COMPILER)
+# include <intrin.h>
+#endif
+
+
+/// Find out how many equal bytes the two buffers have.
+///
+/// \param buf1 First buffer
+/// \param buf2 Second buffer
+/// \param len How many bytes have already been compared and will
+/// be assumed to match
+/// \param limit How many bytes to compare at most, including the
+/// already-compared bytes. This must be significantly
+/// smaller than UINT32_MAX to avoid integer overflows.
+/// Up to LZMA_MEMCMPLEN_EXTRA bytes may be read past
+/// the specified limit from both buf1 and buf2.
+///
+/// \return Number of equal bytes in the buffers is returned.
+/// This is always at least len and at most limit.
+///
+/// \note LZMA_MEMCMPLEN_EXTRA defines how many extra bytes may be read.
+/// It's rounded up to 2^n. This extra amount needs to be
+/// allocated in the buffers being used. It needs to be
+/// initialized too to keep Valgrind quiet.
+static lzma_always_inline uint32_t
+lzma_memcmplen(const uint8_t *buf1, const uint8_t *buf2,
+ uint32_t len, uint32_t limit)
+{
+ assert(len <= limit);
+ assert(limit <= UINT32_MAX / 2);
+
+#if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
+ && ((TUKLIB_GNUC_REQ(3, 4) && defined(__x86_64__)) \
+ || (defined(__INTEL_COMPILER) && defined(__x86_64__)) \
+ || (defined(__INTEL_COMPILER) && defined(_M_X64)) \
+ || (defined(_MSC_VER) && defined(_M_X64)))
+ // I keep this x86-64 only for now since that's where I know this
+ // to be a good method. This may be fine on other 64-bit CPUs too.
+ // On big endian one should use xor instead of subtraction and switch
+ // to __builtin_clzll().
+#define LZMA_MEMCMPLEN_EXTRA 8
+ while (len < limit) {
+ const uint64_t x = read64ne(buf1 + len) - read64ne(buf2 + len);
+ if (x != 0) {
+ // MSVC or Intel C compiler on Windows
+# if (defined(_MSC_VER) || defined(__INTEL_COMPILER)) && defined(_M_X64)
+ unsigned long tmp;
+ _BitScanForward64(&tmp, x);
+ len += (uint32_t)tmp >> 3;
+ // GCC, Clang, or Intel C compiler
+# else
+ len += (uint32_t)__builtin_ctzll(x) >> 3;
+# endif
+ return my_min(len, limit);
+ }
+
+ len += 8;
+ }
+
+ return limit;
+
+#elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
+ && defined(HAVE__MM_MOVEMASK_EPI8) \
+ && (defined(__SSE2__) \
+ || (defined(_MSC_VER) && defined(_M_IX86_FP) \
+ && _M_IX86_FP >= 2))
+ // NOTE: This will use 128-bit unaligned access which
+ // TUKLIB_FAST_UNALIGNED_ACCESS wasn't meant to permit,
+ // but it's convenient here since this is x86-only.
+ //
+ // SSE2 version for 32-bit and 64-bit x86. On x86-64 the above
+ // version is sometimes significantly faster and sometimes
+ // slightly slower than this SSE2 version, so this SSE2
+ // version isn't used on x86-64.
+# define LZMA_MEMCMPLEN_EXTRA 16
+ while (len < limit) {
+ const uint32_t x = 0xFFFF ^ (uint32_t)_mm_movemask_epi8(
+ _mm_cmpeq_epi8(
+ _mm_loadu_si128((const __m128i *)(buf1 + len)),
+ _mm_loadu_si128((const __m128i *)(buf2 + len))));
+
+ if (x != 0) {
+ len += ctz32(x);
+ return my_min(len, limit);
+ }
+
+ len += 16;
+ }
+
+ return limit;
+
+#elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) && !defined(WORDS_BIGENDIAN)
+ // Generic 32-bit little endian method
+# define LZMA_MEMCMPLEN_EXTRA 4
+ while (len < limit) {
+ uint32_t x = read32ne(buf1 + len) - read32ne(buf2 + len);
+ if (x != 0) {
+ if ((x & 0xFFFF) == 0) {
+ len += 2;
+ x >>= 16;
+ }
+
+ if ((x & 0xFF) == 0)
+ ++len;
+
+ return my_min(len, limit);
+ }
+
+ len += 4;
+ }
+
+ return limit;
+
+#elif defined(TUKLIB_FAST_UNALIGNED_ACCESS) && defined(WORDS_BIGENDIAN)
+ // Generic 32-bit big endian method
+# define LZMA_MEMCMPLEN_EXTRA 4
+ while (len < limit) {
+ uint32_t x = read32ne(buf1 + len) ^ read32ne(buf2 + len);
+ if (x != 0) {
+ if ((x & 0xFFFF0000) == 0) {
+ len += 2;
+ x <<= 16;
+ }
+
+ if ((x & 0xFF000000) == 0)
+ ++len;
+
+ return my_min(len, limit);
+ }
+
+ len += 4;
+ }
+
+ return limit;
+
+#else
+ // Simple portable version that doesn't use unaligned access.
+# define LZMA_MEMCMPLEN_EXTRA 0
+ while (len < limit && buf1[len] == buf2[len])
+ ++len;
+
+ return len;
+#endif
+}
+
+#endif
diff --git a/src/liblzma/common/microlzma_decoder.c b/src/liblzma/common/microlzma_decoder.c
new file mode 100644
index 0000000..e473373
--- /dev/null
+++ b/src/liblzma/common/microlzma_decoder.c
@@ -0,0 +1,221 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file microlzma_decoder.c
+/// \brief Decode MicroLZMA format
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lzma_decoder.h"
+#include "lz_decoder.h"
+
+
+typedef struct {
+ /// LZMA1 decoder
+ lzma_next_coder lzma;
+
+ /// Compressed size of the stream as given by the application.
+ /// This must be exactly correct.
+ ///
+ /// This will be decremented when input is read.
+ uint64_t comp_size;
+
+ /// Uncompressed size of the stream as given by the application.
+ /// This may be less than the actual uncompressed size if
+ /// uncomp_size_is_exact is false.
+ ///
+ /// This will be decremented when output is produced.
+ lzma_vli uncomp_size;
+
+ /// LZMA dictionary size as given by the application
+ uint32_t dict_size;
+
+ /// If true, the exact uncompressed size is known. If false,
+ /// uncomp_size may be smaller than the real uncompressed size;
+ /// uncomp_size may never be bigger than the real uncompressed size.
+ bool uncomp_size_is_exact;
+
+ /// True once the first byte of the MicroLZMA stream
+ /// has been processed.
+ bool props_decoded;
+} lzma_microlzma_coder;
+
+
+static lzma_ret
+microlzma_decode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_microlzma_coder *coder = coder_ptr;
+
+ // Remember the in start position so that we can update comp_size.
+ const size_t in_start = *in_pos;
+
+ // Remember the out start position so that we can update uncomp_size.
+ const size_t out_start = *out_pos;
+
+ // Limit the amount of input so that the decoder won't read more than
+ // comp_size. This is required when uncomp_size isn't exact because
+ // in that case the LZMA decoder will try to decode more input even
+ // when it has no output space (it can be looking for EOPM).
+ if (in_size - *in_pos > coder->comp_size)
+ in_size = *in_pos + (size_t)(coder->comp_size);
+
+ // When the exact uncompressed size isn't known, we must limit
+ // the available output space to prevent the LZMA decoder from
+ // trying to decode too much.
+ if (!coder->uncomp_size_is_exact
+ && out_size - *out_pos > coder->uncomp_size)
+ out_size = *out_pos + (size_t)(coder->uncomp_size);
+
+ if (!coder->props_decoded) {
+ // There must be at least one byte of input to decode
+ // the properties byte.
+ if (*in_pos >= in_size)
+ return LZMA_OK;
+
+ lzma_options_lzma options = {
+ .dict_size = coder->dict_size,
+ .preset_dict = NULL,
+ .preset_dict_size = 0,
+ .ext_flags = 0, // EOPM not allowed when size is known
+ .ext_size_low = UINT32_MAX, // Unknown size by default
+ .ext_size_high = UINT32_MAX,
+ };
+
+ if (coder->uncomp_size_is_exact)
+ lzma_set_ext_size(options, coder->uncomp_size);
+
+ // The properties are stored as bitwise-negation
+ // of the typical encoding.
+ if (lzma_lzma_lclppb_decode(&options, ~in[*in_pos]))
+ return LZMA_OPTIONS_ERROR;
+
+ ++*in_pos;
+
+ // Initialize the decoder.
+ lzma_filter_info filters[2] = {
+ {
+ .id = LZMA_FILTER_LZMA1EXT,
+ .init = &lzma_lzma_decoder_init,
+ .options = &options,
+ }, {
+ .init = NULL,
+ }
+ };
+
+ return_if_error(lzma_next_filter_init(&coder->lzma,
+ allocator, filters));
+
+ // Pass one dummy 0x00 byte to the LZMA decoder since that
+ // is what it expects the first byte to be.
+ const uint8_t dummy_in = 0;
+ size_t dummy_in_pos = 0;
+ if (coder->lzma.code(coder->lzma.coder, allocator,
+ &dummy_in, &dummy_in_pos, 1,
+ out, out_pos, out_size, LZMA_RUN) != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ assert(dummy_in_pos == 1);
+ coder->props_decoded = true;
+ }
+
+ // The rest is normal LZMA decoding.
+ lzma_ret ret = coder->lzma.code(coder->lzma.coder, allocator,
+ in, in_pos, in_size,
+ out, out_pos, out_size, action);
+
+ // Update the remaining compressed size.
+ assert(coder->comp_size >= *in_pos - in_start);
+ coder->comp_size -= *in_pos - in_start;
+
+ if (coder->uncomp_size_is_exact) {
+ // After successful decompression of the complete stream
+ // the compressed size must match.
+ if (ret == LZMA_STREAM_END && coder->comp_size != 0)
+ ret = LZMA_DATA_ERROR;
+ } else {
+ // Update the amount of output remaining.
+ assert(coder->uncomp_size >= *out_pos - out_start);
+ coder->uncomp_size -= *out_pos - out_start;
+
+ // - We must not get LZMA_STREAM_END because the stream
+ // shouldn't have EOPM.
+ // - We must use uncomp_size to determine when to
+ // return LZMA_STREAM_END.
+ if (ret == LZMA_STREAM_END)
+ ret = LZMA_DATA_ERROR;
+ else if (coder->uncomp_size == 0)
+ ret = LZMA_STREAM_END;
+ }
+
+ return ret;
+}
+
+
+static void
+microlzma_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_microlzma_coder *coder = coder_ptr;
+ lzma_next_end(&coder->lzma, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+microlzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ uint64_t comp_size,
+ uint64_t uncomp_size, bool uncomp_size_is_exact,
+ uint32_t dict_size)
+{
+ lzma_next_coder_init(&microlzma_decoder_init, next, allocator);
+
+ lzma_microlzma_coder *coder = next->coder;
+
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_microlzma_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &microlzma_decode;
+ next->end = &microlzma_decoder_end;
+
+ coder->lzma = LZMA_NEXT_CODER_INIT;
+ }
+
+ // The public API is uint64_t but the internal LZ decoder API uses
+ // lzma_vli.
+ if (uncomp_size > LZMA_VLI_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ coder->comp_size = comp_size;
+ coder->uncomp_size = uncomp_size;
+ coder->uncomp_size_is_exact = uncomp_size_is_exact;
+ coder->dict_size = dict_size;
+
+ coder->props_decoded = false;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_microlzma_decoder(lzma_stream *strm, uint64_t comp_size,
+ uint64_t uncomp_size, lzma_bool uncomp_size_is_exact,
+ uint32_t dict_size)
+{
+ lzma_next_strm_init(microlzma_decoder_init, strm, comp_size,
+ uncomp_size, uncomp_size_is_exact, dict_size);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/microlzma_encoder.c b/src/liblzma/common/microlzma_encoder.c
new file mode 100644
index 0000000..a787ca2
--- /dev/null
+++ b/src/liblzma/common/microlzma_encoder.c
@@ -0,0 +1,141 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file microlzma_encoder.c
+/// \brief Encode into MicroLZMA format
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "lzma_encoder.h"
+
+
+typedef struct {
+ /// LZMA1 encoder
+ lzma_next_coder lzma;
+
+ /// LZMA properties byte (lc/lp/pb)
+ uint8_t props;
+} lzma_microlzma_coder;
+
+
+static lzma_ret
+microlzma_encode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_microlzma_coder *coder = coder_ptr;
+
+ // Remember *out_pos so that we can overwrite the first byte with
+ // the LZMA properties byte.
+ const size_t out_start = *out_pos;
+
+ // Remember *in_pos so that we can set it based on how many
+ // uncompressed bytes were actually encoded.
+ const size_t in_start = *in_pos;
+
+ // Set the output size limit based on the available output space.
+ // We know that the encoder supports set_out_limit() so
+ // LZMA_OPTIONS_ERROR isn't possible. LZMA_BUF_ERROR is possible
+ // but lzma_code() has an assertion to not allow it to be returned
+ // from here and I don't want to change that for now, so
+ // LZMA_BUF_ERROR becomes LZMA_PROG_ERROR.
+ uint64_t uncomp_size;
+ if (coder->lzma.set_out_limit(coder->lzma.coder,
+ &uncomp_size, out_size - *out_pos) != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ // set_out_limit fails if this isn't true.
+ assert(out_size - *out_pos >= 6);
+
+ // Encode as much as possible.
+ const lzma_ret ret = coder->lzma.code(coder->lzma.coder, allocator,
+ in, in_pos, in_size, out, out_pos, out_size, action);
+
+ if (ret != LZMA_STREAM_END) {
+ if (ret == LZMA_OK) {
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+ return ret;
+ }
+
+ // The first output byte is bitwise-negation of the properties byte.
+ // We know that there is space for this byte because set_out_limit
+ // and the actual encoding succeeded.
+ out[out_start] = (uint8_t)(~coder->props);
+
+ // The LZMA encoder likely read more input than it was able to encode.
+ // Set *in_pos based on uncomp_size.
+ assert(uncomp_size <= in_size - in_start);
+ *in_pos = in_start + (size_t)(uncomp_size);
+
+ return ret;
+}
+
+
+static void
+microlzma_encoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_microlzma_coder *coder = coder_ptr;
+ lzma_next_end(&coder->lzma, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+microlzma_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_options_lzma *options)
+{
+ lzma_next_coder_init(&microlzma_encoder_init, next, allocator);
+
+ lzma_microlzma_coder *coder = next->coder;
+
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_microlzma_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &microlzma_encode;
+ next->end = &microlzma_encoder_end;
+
+ coder->lzma = LZMA_NEXT_CODER_INIT;
+ }
+
+ // Encode the properties byte. Bitwise-negation of it will be the
+ // first output byte.
+ if (lzma_lzma_lclppb_encode(options, &coder->props))
+ return LZMA_OPTIONS_ERROR;
+
+ // Initialize the LZMA encoder.
+ const lzma_filter_info filters[2] = {
+ {
+ .id = LZMA_FILTER_LZMA1,
+ .init = &lzma_lzma_encoder_init,
+ .options = (void *)(options),
+ }, {
+ .init = NULL,
+ }
+ };
+
+ return lzma_next_filter_init(&coder->lzma, allocator, filters);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_microlzma_encoder(lzma_stream *strm, const lzma_options_lzma *options)
+{
+ lzma_next_strm_init(microlzma_encoder_init, strm, options);
+
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+
+}
diff --git a/src/liblzma/common/outqueue.c b/src/liblzma/common/outqueue.c
new file mode 100644
index 0000000..71e8648
--- /dev/null
+++ b/src/liblzma/common/outqueue.c
@@ -0,0 +1,287 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file outqueue.c
+/// \brief Output queue handling in multithreaded coding
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "outqueue.h"
+
+
+/// Get the maximum number of buffers that may be allocated based
+/// on the number of threads. For now this is twice the number of threads.
+/// It's a compromise between RAM usage and keeping the worker threads busy
+/// when buffers finish out of order.
+#define GET_BUFS_LIMIT(threads) (2 * (threads))
+
+
+extern uint64_t
+lzma_outq_memusage(uint64_t buf_size_max, uint32_t threads)
+{
+ // This is to ease integer overflow checking: We may allocate up to
+ // GET_BUFS_LIMIT(LZMA_THREADS_MAX) buffers and we need some extra
+ // memory for other data structures too (that's the /2).
+ //
+ // lzma_outq_prealloc_buf() will still accept bigger buffers than this.
+ const uint64_t limit
+ = UINT64_MAX / GET_BUFS_LIMIT(LZMA_THREADS_MAX) / 2;
+
+ if (threads > LZMA_THREADS_MAX || buf_size_max > limit)
+ return UINT64_MAX;
+
+ return GET_BUFS_LIMIT(threads)
+ * lzma_outq_outbuf_memusage(buf_size_max);
+}
+
+
+static void
+move_head_to_cache(lzma_outq *outq, const lzma_allocator *allocator)
+{
+ assert(outq->head != NULL);
+ assert(outq->tail != NULL);
+ assert(outq->bufs_in_use > 0);
+
+ lzma_outbuf *buf = outq->head;
+ outq->head = buf->next;
+ if (outq->head == NULL)
+ outq->tail = NULL;
+
+ if (outq->cache != NULL && outq->cache->allocated != buf->allocated)
+ lzma_outq_clear_cache(outq, allocator);
+
+ buf->next = outq->cache;
+ outq->cache = buf;
+
+ --outq->bufs_in_use;
+ outq->mem_in_use -= lzma_outq_outbuf_memusage(buf->allocated);
+
+ return;
+}
+
+
+static void
+free_one_cached_buffer(lzma_outq *outq, const lzma_allocator *allocator)
+{
+ assert(outq->cache != NULL);
+
+ lzma_outbuf *buf = outq->cache;
+ outq->cache = buf->next;
+
+ --outq->bufs_allocated;
+ outq->mem_allocated -= lzma_outq_outbuf_memusage(buf->allocated);
+
+ lzma_free(buf, allocator);
+ return;
+}
+
+
+extern void
+lzma_outq_clear_cache(lzma_outq *outq, const lzma_allocator *allocator)
+{
+ while (outq->cache != NULL)
+ free_one_cached_buffer(outq, allocator);
+
+ return;
+}
+
+
+extern void
+lzma_outq_clear_cache2(lzma_outq *outq, const lzma_allocator *allocator,
+ size_t keep_size)
+{
+ if (outq->cache == NULL)
+ return;
+
+ // Free all but one.
+ while (outq->cache->next != NULL)
+ free_one_cached_buffer(outq, allocator);
+
+ // Free the last one only if its size doesn't equal to keep_size.
+ if (outq->cache->allocated != keep_size)
+ free_one_cached_buffer(outq, allocator);
+
+ return;
+}
+
+
+extern lzma_ret
+lzma_outq_init(lzma_outq *outq, const lzma_allocator *allocator,
+ uint32_t threads)
+{
+ if (threads > LZMA_THREADS_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ const uint32_t bufs_limit = GET_BUFS_LIMIT(threads);
+
+ // Clear head/tail.
+ while (outq->head != NULL)
+ move_head_to_cache(outq, allocator);
+
+ // If new buf_limit is lower than the old one, we may need to free
+ // a few cached buffers.
+ while (bufs_limit < outq->bufs_allocated)
+ free_one_cached_buffer(outq, allocator);
+
+ outq->bufs_limit = bufs_limit;
+ outq->read_pos = 0;
+
+ return LZMA_OK;
+}
+
+
+extern void
+lzma_outq_end(lzma_outq *outq, const lzma_allocator *allocator)
+{
+ while (outq->head != NULL)
+ move_head_to_cache(outq, allocator);
+
+ lzma_outq_clear_cache(outq, allocator);
+ return;
+}
+
+
+extern lzma_ret
+lzma_outq_prealloc_buf(lzma_outq *outq, const lzma_allocator *allocator,
+ size_t size)
+{
+ // Caller must have checked it with lzma_outq_has_buf().
+ assert(outq->bufs_in_use < outq->bufs_limit);
+
+ // If there already is appropriately-sized buffer in the cache,
+ // we need to do nothing.
+ if (outq->cache != NULL && outq->cache->allocated == size)
+ return LZMA_OK;
+
+ if (size > SIZE_MAX - sizeof(lzma_outbuf))
+ return LZMA_MEM_ERROR;
+
+ const size_t alloc_size = lzma_outq_outbuf_memusage(size);
+
+ // The cache may have buffers but their size is wrong.
+ lzma_outq_clear_cache(outq, allocator);
+
+ outq->cache = lzma_alloc(alloc_size, allocator);
+ if (outq->cache == NULL)
+ return LZMA_MEM_ERROR;
+
+ outq->cache->next = NULL;
+ outq->cache->allocated = size;
+
+ ++outq->bufs_allocated;
+ outq->mem_allocated += alloc_size;
+
+ return LZMA_OK;
+}
+
+
+extern lzma_outbuf *
+lzma_outq_get_buf(lzma_outq *outq, void *worker)
+{
+ // Caller must have used lzma_outq_prealloc_buf() to ensure these.
+ assert(outq->bufs_in_use < outq->bufs_limit);
+ assert(outq->bufs_in_use < outq->bufs_allocated);
+ assert(outq->cache != NULL);
+
+ lzma_outbuf *buf = outq->cache;
+ outq->cache = buf->next;
+ buf->next = NULL;
+
+ if (outq->tail != NULL) {
+ assert(outq->head != NULL);
+ outq->tail->next = buf;
+ } else {
+ assert(outq->head == NULL);
+ outq->head = buf;
+ }
+
+ outq->tail = buf;
+
+ buf->worker = worker;
+ buf->finished = false;
+ buf->finish_ret = LZMA_STREAM_END;
+ buf->pos = 0;
+ buf->decoder_in_pos = 0;
+
+ buf->unpadded_size = 0;
+ buf->uncompressed_size = 0;
+
+ ++outq->bufs_in_use;
+ outq->mem_in_use += lzma_outq_outbuf_memusage(buf->allocated);
+
+ return buf;
+}
+
+
+extern bool
+lzma_outq_is_readable(const lzma_outq *outq)
+{
+ if (outq->head == NULL)
+ return false;
+
+ return outq->read_pos < outq->head->pos || outq->head->finished;
+}
+
+
+extern lzma_ret
+lzma_outq_read(lzma_outq *restrict outq,
+ const lzma_allocator *restrict allocator,
+ uint8_t *restrict out, size_t *restrict out_pos,
+ size_t out_size,
+ lzma_vli *restrict unpadded_size,
+ lzma_vli *restrict uncompressed_size)
+{
+ // There must be at least one buffer from which to read.
+ if (outq->bufs_in_use == 0)
+ return LZMA_OK;
+
+ // Get the buffer.
+ lzma_outbuf *buf = outq->head;
+
+ // Copy from the buffer to output.
+ //
+ // FIXME? In threaded decoder it may be bad to do this copy while
+ // the mutex is being held.
+ lzma_bufcpy(buf->buf, &outq->read_pos, buf->pos,
+ out, out_pos, out_size);
+
+ // Return if we didn't get all the data from the buffer.
+ if (!buf->finished || outq->read_pos < buf->pos)
+ return LZMA_OK;
+
+ // The buffer was finished. Tell the caller its size information.
+ if (unpadded_size != NULL)
+ *unpadded_size = buf->unpadded_size;
+
+ if (uncompressed_size != NULL)
+ *uncompressed_size = buf->uncompressed_size;
+
+ // Remember the return value.
+ const lzma_ret finish_ret = buf->finish_ret;
+
+ // Free this buffer for further use.
+ move_head_to_cache(outq, allocator);
+ outq->read_pos = 0;
+
+ return finish_ret;
+}
+
+
+extern void
+lzma_outq_enable_partial_output(lzma_outq *outq,
+ void (*enable_partial_output)(void *worker))
+{
+ if (outq->head != NULL && !outq->head->finished
+ && outq->head->worker != NULL) {
+ enable_partial_output(outq->head->worker);
+
+ // Set it to NULL since calling it twice is pointless.
+ outq->head->worker = NULL;
+ }
+
+ return;
+}
diff --git a/src/liblzma/common/outqueue.h b/src/liblzma/common/outqueue.h
new file mode 100644
index 0000000..596911e
--- /dev/null
+++ b/src/liblzma/common/outqueue.h
@@ -0,0 +1,254 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file outqueue.h
+/// \brief Output queue handling in multithreaded coding
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+
+
+/// Output buffer for a single thread
+typedef struct lzma_outbuf_s lzma_outbuf;
+struct lzma_outbuf_s {
+ /// Pointer to the next buffer. This is used for the cached buffers.
+ /// The worker thread must not modify this.
+ lzma_outbuf *next;
+
+ /// This initialized by lzma_outq_get_buf() and
+ /// is used by lzma_outq_enable_partial_output().
+ /// The worker thread must not modify this.
+ void *worker;
+
+ /// Amount of memory allocated for buf[].
+ /// The worker thread must not modify this.
+ size_t allocated;
+
+ /// Writing position in the worker thread or, in other words, the
+ /// amount of finished data written to buf[] which can be copied out
+ ///
+ /// \note This is read by another thread and thus access
+ /// to this variable needs a mutex.
+ size_t pos;
+
+ /// Decompression: Position in the input buffer in the worker thread
+ /// that matches the output "pos" above. This is used to detect if
+ /// more output might be possible from the worker thread: if it has
+ /// consumed all its input, then more output isn't possible.
+ ///
+ /// \note This is read by another thread and thus access
+ /// to this variable needs a mutex.
+ size_t decoder_in_pos;
+
+ /// True when no more data will be written into this buffer.
+ ///
+ /// \note This is read by another thread and thus access
+ /// to this variable needs a mutex.
+ bool finished;
+
+ /// Return value for lzma_outq_read() when the last byte from
+ /// a finished buffer has been read. Defaults to LZMA_STREAM_END.
+ /// This must *not* be LZMA_OK. The idea is to allow a decoder to
+ /// pass an error code to the main thread, setting the code here
+ /// together with finished = true.
+ lzma_ret finish_ret;
+
+ /// Additional size information. lzma_outq_read() may read these
+ /// when "finished" is true.
+ lzma_vli unpadded_size;
+ lzma_vli uncompressed_size;
+
+ /// Buffer of "allocated" bytes
+ uint8_t buf[];
+};
+
+
+typedef struct {
+ /// Linked list of buffers in use. The next output byte will be
+ /// read from the head and buffers for the next thread will be
+ /// appended to the tail. tail->next is always NULL.
+ lzma_outbuf *head;
+ lzma_outbuf *tail;
+
+ /// Number of bytes read from head->buf[] in lzma_outq_read()
+ size_t read_pos;
+
+ /// Linked list of allocated buffers that aren't currently used.
+ /// This way buffers of similar size can be reused and don't
+ /// need to be reallocated every time. For simplicity, all
+ /// cached buffers in the list have the same allocated size.
+ lzma_outbuf *cache;
+
+ /// Total amount of memory allocated for buffers
+ uint64_t mem_allocated;
+
+ /// Amount of memory used by the buffers that are in use in
+ /// the head...tail linked list.
+ uint64_t mem_in_use;
+
+ /// Number of buffers in use in the head...tail list. If and only if
+ /// this is zero, the pointers head and tail above are NULL.
+ uint32_t bufs_in_use;
+
+ /// Number of buffers allocated (in use + cached)
+ uint32_t bufs_allocated;
+
+ /// Maximum allowed number of allocated buffers
+ uint32_t bufs_limit;
+} lzma_outq;
+
+
+/**
+ * \brief Calculate the memory usage of an output queue
+ *
+ * \return Approximate memory usage in bytes or UINT64_MAX on error.
+ */
+extern uint64_t lzma_outq_memusage(uint64_t buf_size_max, uint32_t threads);
+
+
+/// \brief Initialize an output queue
+///
+/// \param outq Pointer to an output queue. Before calling
+/// this function the first time, *outq should
+/// have been zeroed with memzero() so that this
+/// function knows that there are no previous
+/// allocations to free.
+/// \param allocator Pointer to allocator or NULL
+/// \param threads Number of buffers that may be in use
+/// concurrently. Note that more than this number
+/// of buffers may actually get allocated to
+/// improve performance when buffers finish
+/// out of order. The actual maximum number of
+/// allocated buffers is derived from the number
+/// of threads.
+///
+/// \return - LZMA_OK
+/// - LZMA_MEM_ERROR
+///
+extern lzma_ret lzma_outq_init(lzma_outq *outq,
+ const lzma_allocator *allocator, uint32_t threads);
+
+
+/// \brief Free the memory associated with the output queue
+extern void lzma_outq_end(lzma_outq *outq, const lzma_allocator *allocator);
+
+
+/// \brief Free all cached buffers that consume memory but aren't in use
+extern void lzma_outq_clear_cache(
+ lzma_outq *outq, const lzma_allocator *allocator);
+
+
+/// \brief Like lzma_outq_clear_cache() but might keep one buffer
+///
+/// One buffer is not freed if its size is equal to keep_size.
+/// This is useful if the caller knows that it will soon need a buffer of
+/// keep_size bytes. This way it won't be freed and immediately reallocated.
+extern void lzma_outq_clear_cache2(
+ lzma_outq *outq, const lzma_allocator *allocator,
+ size_t keep_size);
+
+
+/// \brief Preallocate a new buffer into cache
+///
+/// Splitting the buffer allocation into a separate function makes it
+/// possible to ensure that way lzma_outq_get_buf() cannot fail.
+/// If the preallocated buffer isn't actually used (for example, some
+/// other error occurs), the caller has to do nothing as the buffer will
+/// be used later or cleared from the cache when not needed.
+///
+/// \return LZMA_OK on success, LZMA_MEM_ERROR if allocation fails
+///
+extern lzma_ret lzma_outq_prealloc_buf(
+ lzma_outq *outq, const lzma_allocator *allocator, size_t size);
+
+
+/// \brief Get a new buffer
+///
+/// lzma_outq_prealloc_buf() must be used to ensure that there is a buffer
+/// available before calling lzma_outq_get_buf().
+///
+extern lzma_outbuf *lzma_outq_get_buf(lzma_outq *outq, void *worker);
+
+
+/// \brief Test if there is data ready to be read
+///
+/// Call to this function must be protected with the same mutex that
+/// is used to protect lzma_outbuf.finished.
+///
+extern bool lzma_outq_is_readable(const lzma_outq *outq);
+
+
+/// \brief Read finished data
+///
+/// \param outq Pointer to an output queue
+/// \param out Beginning of the output buffer
+/// \param out_pos The next byte will be written to
+/// out[*out_pos].
+/// \param out_size Size of the out buffer; the first byte into
+/// which no data is written to is out[out_size].
+/// \param unpadded_size Unpadded Size from the Block encoder
+/// \param uncompressed_size Uncompressed Size from the Block encoder
+///
+/// \return - LZMA: All OK. Either no data was available or the buffer
+/// being read didn't become empty yet.
+/// - LZMA_STREAM_END: The buffer being read was finished.
+/// *unpadded_size and *uncompressed_size were set if they
+/// were not NULL.
+///
+/// \note This reads lzma_outbuf.finished and .pos variables and thus
+/// calls to this function need to be protected with a mutex.
+///
+extern lzma_ret lzma_outq_read(lzma_outq *restrict outq,
+ const lzma_allocator *restrict allocator,
+ uint8_t *restrict out, size_t *restrict out_pos,
+ size_t out_size, lzma_vli *restrict unpadded_size,
+ lzma_vli *restrict uncompressed_size);
+
+
+/// \brief Enable partial output from a worker thread
+///
+/// If the buffer at the head of the output queue isn't finished,
+/// this will call enable_partial_output on the worker associated with
+/// that output buffer.
+///
+/// \note This reads a lzma_outbuf.finished variable and thus
+/// calls to this function need to be protected with a mutex.
+///
+extern void lzma_outq_enable_partial_output(lzma_outq *outq,
+ void (*enable_partial_output)(void *worker));
+
+
+/// \brief Test if there is at least one buffer free
+///
+/// This must be used before getting a new buffer with lzma_outq_get_buf().
+///
+static inline bool
+lzma_outq_has_buf(const lzma_outq *outq)
+{
+ return outq->bufs_in_use < outq->bufs_limit;
+}
+
+
+/// \brief Test if the queue is completely empty
+static inline bool
+lzma_outq_is_empty(const lzma_outq *outq)
+{
+ return outq->bufs_in_use == 0;
+}
+
+
+/// \brief Get the amount of memory needed for a single lzma_outbuf
+///
+/// \note Caller must check that the argument is significantly less
+/// than SIZE_MAX to avoid an integer overflow!
+static inline uint64_t
+lzma_outq_outbuf_memusage(size_t buf_size)
+{
+ assert(buf_size <= SIZE_MAX - sizeof(lzma_outbuf));
+ return sizeof(lzma_outbuf) + buf_size;
+}
diff --git a/src/liblzma/common/stream_buffer_decoder.c b/src/liblzma/common/stream_buffer_decoder.c
new file mode 100644
index 0000000..b9745b5
--- /dev/null
+++ b/src/liblzma/common/stream_buffer_decoder.c
@@ -0,0 +1,91 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file stream_buffer_decoder.c
+/// \brief Single-call .xz Stream decoder
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stream_decoder.h"
+
+
+extern LZMA_API(lzma_ret)
+lzma_stream_buffer_decode(uint64_t *memlimit, uint32_t flags,
+ const lzma_allocator *allocator,
+ const uint8_t *in, size_t *in_pos, size_t in_size,
+ uint8_t *out, size_t *out_pos, size_t out_size)
+{
+ // Sanity checks
+ if (in_pos == NULL || (in == NULL && *in_pos != in_size)
+ || *in_pos > in_size || out_pos == NULL
+ || (out == NULL && *out_pos != out_size)
+ || *out_pos > out_size)
+ return LZMA_PROG_ERROR;
+
+ // Catch flags that are not allowed in buffer-to-buffer decoding.
+ if (flags & LZMA_TELL_ANY_CHECK)
+ return LZMA_PROG_ERROR;
+
+ // Initialize the Stream decoder.
+ // TODO: We need something to tell the decoder that it can use the
+ // output buffer as workspace, and thus save significant amount of RAM.
+ lzma_next_coder stream_decoder = LZMA_NEXT_CODER_INIT;
+ lzma_ret ret = lzma_stream_decoder_init(
+ &stream_decoder, allocator, *memlimit, flags);
+
+ if (ret == LZMA_OK) {
+ // Save the positions so that we can restore them in case
+ // an error occurs.
+ const size_t in_start = *in_pos;
+ const size_t out_start = *out_pos;
+
+ // Do the actual decoding.
+ ret = stream_decoder.code(stream_decoder.coder, allocator,
+ in, in_pos, in_size, out, out_pos, out_size,
+ LZMA_FINISH);
+
+ if (ret == LZMA_STREAM_END) {
+ ret = LZMA_OK;
+ } else {
+ // Something went wrong, restore the positions.
+ *in_pos = in_start;
+ *out_pos = out_start;
+
+ if (ret == LZMA_OK) {
+ // Either the input was truncated or the
+ // output buffer was too small.
+ assert(*in_pos == in_size
+ || *out_pos == out_size);
+
+ // If all the input was consumed, then the
+ // input is truncated, even if the output
+ // buffer is also full. This is because
+ // processing the last byte of the Stream
+ // never produces output.
+ if (*in_pos == in_size)
+ ret = LZMA_DATA_ERROR;
+ else
+ ret = LZMA_BUF_ERROR;
+
+ } else if (ret == LZMA_MEMLIMIT_ERROR) {
+ // Let the caller know how much memory would
+ // have been needed.
+ uint64_t memusage;
+ (void)stream_decoder.memconfig(
+ stream_decoder.coder,
+ memlimit, &memusage, 0);
+ }
+ }
+ }
+
+ // Free the decoder memory. This needs to be done even if
+ // initialization fails, because the internal API doesn't
+ // require the initialization function to free its memory on error.
+ lzma_next_end(&stream_decoder, allocator);
+
+ return ret;
+}
diff --git a/src/liblzma/common/stream_buffer_encoder.c b/src/liblzma/common/stream_buffer_encoder.c
new file mode 100644
index 0000000..7315759
--- /dev/null
+++ b/src/liblzma/common/stream_buffer_encoder.c
@@ -0,0 +1,142 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file stream_buffer_encoder.c
+/// \brief Single-call .xz Stream encoder
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+#include "index.h"
+
+
+/// Maximum size of Index that has exactly one Record.
+/// Index Indicator + Number of Records + Record + CRC32 rounded up to
+/// the next multiple of four.
+#define INDEX_BOUND ((1 + 1 + 2 * LZMA_VLI_BYTES_MAX + 4 + 3) & ~3)
+
+/// Stream Header, Stream Footer, and Index
+#define HEADERS_BOUND (2 * LZMA_STREAM_HEADER_SIZE + INDEX_BOUND)
+
+
+extern LZMA_API(size_t)
+lzma_stream_buffer_bound(size_t uncompressed_size)
+{
+ // Get the maximum possible size of a Block.
+ const size_t block_bound = lzma_block_buffer_bound(uncompressed_size);
+ if (block_bound == 0)
+ return 0;
+
+ // Catch the possible integer overflow and also prevent the size of
+ // the Stream exceeding LZMA_VLI_MAX (theoretically possible on
+ // 64-bit systems).
+ if (my_min(SIZE_MAX, LZMA_VLI_MAX) - block_bound < HEADERS_BOUND)
+ return 0;
+
+ return block_bound + HEADERS_BOUND;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_stream_buffer_encode(lzma_filter *filters, lzma_check check,
+ const lzma_allocator *allocator,
+ const uint8_t *in, size_t in_size,
+ uint8_t *out, size_t *out_pos_ptr, size_t out_size)
+{
+ // Sanity checks
+ if (filters == NULL || (unsigned int)(check) > LZMA_CHECK_ID_MAX
+ || (in == NULL && in_size != 0) || out == NULL
+ || out_pos_ptr == NULL || *out_pos_ptr > out_size)
+ return LZMA_PROG_ERROR;
+
+ if (!lzma_check_is_supported(check))
+ return LZMA_UNSUPPORTED_CHECK;
+
+ // Note for the paranoids: Index encoder prevents the Stream from
+ // getting too big and still being accepted with LZMA_OK, and Block
+ // encoder catches if the input is too big. So we don't need to
+ // separately check if the buffers are too big.
+
+ // Use a local copy. We update *out_pos_ptr only if everything
+ // succeeds.
+ size_t out_pos = *out_pos_ptr;
+
+ // Check that there's enough space for both Stream Header and
+ // Stream Footer.
+ if (out_size - out_pos <= 2 * LZMA_STREAM_HEADER_SIZE)
+ return LZMA_BUF_ERROR;
+
+ // Reserve space for Stream Footer so we don't need to check for
+ // available space again before encoding Stream Footer.
+ out_size -= LZMA_STREAM_HEADER_SIZE;
+
+ // Encode the Stream Header.
+ lzma_stream_flags stream_flags = {
+ .version = 0,
+ .check = check,
+ };
+
+ if (lzma_stream_header_encode(&stream_flags, out + out_pos)
+ != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ out_pos += LZMA_STREAM_HEADER_SIZE;
+
+ // Encode a Block but only if there is at least one byte of input.
+ lzma_block block = {
+ .version = 0,
+ .check = check,
+ .filters = filters,
+ };
+
+ if (in_size > 0)
+ return_if_error(lzma_block_buffer_encode(&block, allocator,
+ in, in_size, out, &out_pos, out_size));
+
+ // Index
+ {
+ // Create an Index. It will have one Record if there was
+ // at least one byte of input to encode. Otherwise the
+ // Index will be empty.
+ lzma_index *i = lzma_index_init(allocator);
+ if (i == NULL)
+ return LZMA_MEM_ERROR;
+
+ lzma_ret ret = LZMA_OK;
+
+ if (in_size > 0)
+ ret = lzma_index_append(i, allocator,
+ lzma_block_unpadded_size(&block),
+ block.uncompressed_size);
+
+ // If adding the Record was successful, encode the Index
+ // and get its size which will be stored into Stream Footer.
+ if (ret == LZMA_OK) {
+ ret = lzma_index_buffer_encode(
+ i, out, &out_pos, out_size);
+
+ stream_flags.backward_size = lzma_index_size(i);
+ }
+
+ lzma_index_end(i, allocator);
+
+ if (ret != LZMA_OK)
+ return ret;
+ }
+
+ // Stream Footer. We have already reserved space for this.
+ if (lzma_stream_footer_encode(&stream_flags, out + out_pos)
+ != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ out_pos += LZMA_STREAM_HEADER_SIZE;
+
+ // Everything went fine, make the new output position available
+ // to the application.
+ *out_pos_ptr = out_pos;
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/stream_decoder.c b/src/liblzma/common/stream_decoder.c
new file mode 100644
index 0000000..6428381
--- /dev/null
+++ b/src/liblzma/common/stream_decoder.c
@@ -0,0 +1,474 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file stream_decoder.c
+/// \brief Decodes .xz Streams
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stream_decoder.h"
+#include "block_decoder.h"
+#include "index.h"
+
+
+typedef struct {
+ enum {
+ SEQ_STREAM_HEADER,
+ SEQ_BLOCK_HEADER,
+ SEQ_BLOCK_INIT,
+ SEQ_BLOCK_RUN,
+ SEQ_INDEX,
+ SEQ_STREAM_FOOTER,
+ SEQ_STREAM_PADDING,
+ } sequence;
+
+ /// Block decoder
+ lzma_next_coder block_decoder;
+
+ /// Block options decoded by the Block Header decoder and used by
+ /// the Block decoder.
+ lzma_block block_options;
+
+ /// Stream Flags from Stream Header
+ lzma_stream_flags stream_flags;
+
+ /// Index is hashed so that it can be compared to the sizes of Blocks
+ /// with O(1) memory usage.
+ lzma_index_hash *index_hash;
+
+ /// Memory usage limit
+ uint64_t memlimit;
+
+ /// Amount of memory actually needed (only an estimate)
+ uint64_t memusage;
+
+ /// If true, LZMA_NO_CHECK is returned if the Stream has
+ /// no integrity check.
+ bool tell_no_check;
+
+ /// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has
+ /// an integrity check that isn't supported by this liblzma build.
+ bool tell_unsupported_check;
+
+ /// If true, LZMA_GET_CHECK is returned after decoding Stream Header.
+ bool tell_any_check;
+
+ /// If true, we will tell the Block decoder to skip calculating
+ /// and verifying the integrity check.
+ bool ignore_check;
+
+ /// If true, we will decode concatenated Streams that possibly have
+ /// Stream Padding between or after them. LZMA_STREAM_END is returned
+ /// once the application isn't giving us any new input (LZMA_FINISH),
+ /// and we aren't in the middle of a Stream, and possible
+ /// Stream Padding is a multiple of four bytes.
+ bool concatenated;
+
+ /// When decoding concatenated Streams, this is true as long as we
+ /// are decoding the first Stream. This is needed to avoid misleading
+ /// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic
+ /// bytes.
+ bool first_stream;
+
+ /// Write position in buffer[] and position in Stream Padding
+ size_t pos;
+
+ /// Buffer to hold Stream Header, Block Header, and Stream Footer.
+ /// Block Header has biggest maximum size.
+ uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX];
+} lzma_stream_coder;
+
+
+static lzma_ret
+stream_decoder_reset(lzma_stream_coder *coder, const lzma_allocator *allocator)
+{
+ // Initialize the Index hash used to verify the Index.
+ coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator);
+ if (coder->index_hash == NULL)
+ return LZMA_MEM_ERROR;
+
+ // Reset the rest of the variables.
+ coder->sequence = SEQ_STREAM_HEADER;
+ coder->pos = 0;
+
+ return LZMA_OK;
+}
+
+
+static lzma_ret
+stream_decode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_stream_coder *coder = coder_ptr;
+
+ // When decoding the actual Block, it may be able to produce more
+ // output even if we don't give it any new input.
+ while (true)
+ switch (coder->sequence) {
+ case SEQ_STREAM_HEADER: {
+ // Copy the Stream Header to the internal buffer.
+ lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
+ LZMA_STREAM_HEADER_SIZE);
+
+ // Return if we didn't get the whole Stream Header yet.
+ if (coder->pos < LZMA_STREAM_HEADER_SIZE)
+ return LZMA_OK;
+
+ coder->pos = 0;
+
+ // Decode the Stream Header.
+ const lzma_ret ret = lzma_stream_header_decode(
+ &coder->stream_flags, coder->buffer);
+ if (ret != LZMA_OK)
+ return ret == LZMA_FORMAT_ERROR && !coder->first_stream
+ ? LZMA_DATA_ERROR : ret;
+
+ // If we are decoding concatenated Streams, and the later
+ // Streams have invalid Header Magic Bytes, we give
+ // LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR.
+ coder->first_stream = false;
+
+ // Copy the type of the Check so that Block Header and Block
+ // decoders see it.
+ coder->block_options.check = coder->stream_flags.check;
+
+ // Even if we return LZMA_*_CHECK below, we want
+ // to continue from Block Header decoding.
+ coder->sequence = SEQ_BLOCK_HEADER;
+
+ // Detect if there's no integrity check or if it is
+ // unsupported if those were requested by the application.
+ if (coder->tell_no_check && coder->stream_flags.check
+ == LZMA_CHECK_NONE)
+ return LZMA_NO_CHECK;
+
+ if (coder->tell_unsupported_check
+ && !lzma_check_is_supported(
+ coder->stream_flags.check))
+ return LZMA_UNSUPPORTED_CHECK;
+
+ if (coder->tell_any_check)
+ return LZMA_GET_CHECK;
+ }
+
+ // Fall through
+
+ case SEQ_BLOCK_HEADER: {
+ if (*in_pos >= in_size)
+ return LZMA_OK;
+
+ if (coder->pos == 0) {
+ // Detect if it's Index.
+ if (in[*in_pos] == INDEX_INDICATOR) {
+ coder->sequence = SEQ_INDEX;
+ break;
+ }
+
+ // Calculate the size of the Block Header. Note that
+ // Block Header decoder wants to see this byte too
+ // so don't advance *in_pos.
+ coder->block_options.header_size
+ = lzma_block_header_size_decode(
+ in[*in_pos]);
+ }
+
+ // Copy the Block Header to the internal buffer.
+ lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
+ coder->block_options.header_size);
+
+ // Return if we didn't get the whole Block Header yet.
+ if (coder->pos < coder->block_options.header_size)
+ return LZMA_OK;
+
+ coder->pos = 0;
+ coder->sequence = SEQ_BLOCK_INIT;
+ }
+
+ // Fall through
+
+ case SEQ_BLOCK_INIT: {
+ // Checking memusage and doing the initialization needs
+ // its own sequence point because we need to be able to
+ // retry if we return LZMA_MEMLIMIT_ERROR.
+
+ // Version 1 is needed to support the .ignore_check option.
+ coder->block_options.version = 1;
+
+ // Set up a buffer to hold the filter chain. Block Header
+ // decoder will initialize all members of this array so
+ // we don't need to do it here.
+ lzma_filter filters[LZMA_FILTERS_MAX + 1];
+ coder->block_options.filters = filters;
+
+ // Decode the Block Header.
+ return_if_error(lzma_block_header_decode(&coder->block_options,
+ allocator, coder->buffer));
+
+ // If LZMA_IGNORE_CHECK was used, this flag needs to be set.
+ // It has to be set after lzma_block_header_decode() because
+ // it always resets this to false.
+ coder->block_options.ignore_check = coder->ignore_check;
+
+ // Check the memory usage limit.
+ const uint64_t memusage = lzma_raw_decoder_memusage(filters);
+ lzma_ret ret;
+
+ if (memusage == UINT64_MAX) {
+ // One or more unknown Filter IDs.
+ ret = LZMA_OPTIONS_ERROR;
+ } else {
+ // Now we can set coder->memusage since we know that
+ // the filter chain is valid. We don't want
+ // lzma_memusage() to return UINT64_MAX in case of
+ // invalid filter chain.
+ coder->memusage = memusage;
+
+ if (memusage > coder->memlimit) {
+ // The chain would need too much memory.
+ ret = LZMA_MEMLIMIT_ERROR;
+ } else {
+ // Memory usage is OK.
+ // Initialize the Block decoder.
+ ret = lzma_block_decoder_init(
+ &coder->block_decoder,
+ allocator,
+ &coder->block_options);
+ }
+ }
+
+ // Free the allocated filter options since they are needed
+ // only to initialize the Block decoder.
+ lzma_filters_free(filters, allocator);
+ coder->block_options.filters = NULL;
+
+ // Check if memory usage calculation and Block decoder
+ // initialization succeeded.
+ if (ret != LZMA_OK)
+ return ret;
+
+ coder->sequence = SEQ_BLOCK_RUN;
+ }
+
+ // Fall through
+
+ case SEQ_BLOCK_RUN: {
+ const lzma_ret ret = coder->block_decoder.code(
+ coder->block_decoder.coder, allocator,
+ in, in_pos, in_size, out, out_pos, out_size,
+ action);
+
+ if (ret != LZMA_STREAM_END)
+ return ret;
+
+ // Block decoded successfully. Add the new size pair to
+ // the Index hash.
+ return_if_error(lzma_index_hash_append(coder->index_hash,
+ lzma_block_unpadded_size(
+ &coder->block_options),
+ coder->block_options.uncompressed_size));
+
+ coder->sequence = SEQ_BLOCK_HEADER;
+ break;
+ }
+
+ case SEQ_INDEX: {
+ // If we don't have any input, don't call
+ // lzma_index_hash_decode() since it would return
+ // LZMA_BUF_ERROR, which we must not do here.
+ if (*in_pos >= in_size)
+ return LZMA_OK;
+
+ // Decode the Index and compare it to the hash calculated
+ // from the sizes of the Blocks (if any).
+ const lzma_ret ret = lzma_index_hash_decode(coder->index_hash,
+ in, in_pos, in_size);
+ if (ret != LZMA_STREAM_END)
+ return ret;
+
+ coder->sequence = SEQ_STREAM_FOOTER;
+ }
+
+ // Fall through
+
+ case SEQ_STREAM_FOOTER: {
+ // Copy the Stream Footer to the internal buffer.
+ lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
+ LZMA_STREAM_HEADER_SIZE);
+
+ // Return if we didn't get the whole Stream Footer yet.
+ if (coder->pos < LZMA_STREAM_HEADER_SIZE)
+ return LZMA_OK;
+
+ coder->pos = 0;
+
+ // Decode the Stream Footer. The decoder gives
+ // LZMA_FORMAT_ERROR if the magic bytes don't match,
+ // so convert that return code to LZMA_DATA_ERROR.
+ lzma_stream_flags footer_flags;
+ const lzma_ret ret = lzma_stream_footer_decode(
+ &footer_flags, coder->buffer);
+ if (ret != LZMA_OK)
+ return ret == LZMA_FORMAT_ERROR
+ ? LZMA_DATA_ERROR : ret;
+
+ // Check that Index Size stored in the Stream Footer matches
+ // the real size of the Index field.
+ if (lzma_index_hash_size(coder->index_hash)
+ != footer_flags.backward_size)
+ return LZMA_DATA_ERROR;
+
+ // Compare that the Stream Flags fields are identical in
+ // both Stream Header and Stream Footer.
+ return_if_error(lzma_stream_flags_compare(
+ &coder->stream_flags, &footer_flags));
+
+ if (!coder->concatenated)
+ return LZMA_STREAM_END;
+
+ coder->sequence = SEQ_STREAM_PADDING;
+ }
+
+ // Fall through
+
+ case SEQ_STREAM_PADDING:
+ assert(coder->concatenated);
+
+ // Skip over possible Stream Padding.
+ while (true) {
+ if (*in_pos >= in_size) {
+ // Unless LZMA_FINISH was used, we cannot
+ // know if there's more input coming later.
+ if (action != LZMA_FINISH)
+ return LZMA_OK;
+
+ // Stream Padding must be a multiple of
+ // four bytes.
+ return coder->pos == 0
+ ? LZMA_STREAM_END
+ : LZMA_DATA_ERROR;
+ }
+
+ // If the byte is not zero, it probably indicates
+ // beginning of a new Stream (or the file is corrupt).
+ if (in[*in_pos] != 0x00)
+ break;
+
+ ++*in_pos;
+ coder->pos = (coder->pos + 1) & 3;
+ }
+
+ // Stream Padding must be a multiple of four bytes (empty
+ // Stream Padding is OK).
+ if (coder->pos != 0) {
+ ++*in_pos;
+ return LZMA_DATA_ERROR;
+ }
+
+ // Prepare to decode the next Stream.
+ return_if_error(stream_decoder_reset(coder, allocator));
+ break;
+
+ default:
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+ // Never reached
+}
+
+
+static void
+stream_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_stream_coder *coder = coder_ptr;
+ lzma_next_end(&coder->block_decoder, allocator);
+ lzma_index_hash_end(coder->index_hash, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_check
+stream_decoder_get_check(const void *coder_ptr)
+{
+ const lzma_stream_coder *coder = coder_ptr;
+ return coder->stream_flags.check;
+}
+
+
+static lzma_ret
+stream_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
+ uint64_t *old_memlimit, uint64_t new_memlimit)
+{
+ lzma_stream_coder *coder = coder_ptr;
+
+ *memusage = coder->memusage;
+ *old_memlimit = coder->memlimit;
+
+ if (new_memlimit != 0) {
+ if (new_memlimit < coder->memusage)
+ return LZMA_MEMLIMIT_ERROR;
+
+ coder->memlimit = new_memlimit;
+ }
+
+ return LZMA_OK;
+}
+
+
+extern lzma_ret
+lzma_stream_decoder_init(
+ lzma_next_coder *next, const lzma_allocator *allocator,
+ uint64_t memlimit, uint32_t flags)
+{
+ lzma_next_coder_init(&lzma_stream_decoder_init, next, allocator);
+
+ if (flags & ~LZMA_SUPPORTED_FLAGS)
+ return LZMA_OPTIONS_ERROR;
+
+ lzma_stream_coder *coder = next->coder;
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_stream_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &stream_decode;
+ next->end = &stream_decoder_end;
+ next->get_check = &stream_decoder_get_check;
+ next->memconfig = &stream_decoder_memconfig;
+
+ coder->block_decoder = LZMA_NEXT_CODER_INIT;
+ coder->index_hash = NULL;
+ }
+
+ coder->memlimit = my_max(1, memlimit);
+ coder->memusage = LZMA_MEMUSAGE_BASE;
+ coder->tell_no_check = (flags & LZMA_TELL_NO_CHECK) != 0;
+ coder->tell_unsupported_check
+ = (flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
+ coder->tell_any_check = (flags & LZMA_TELL_ANY_CHECK) != 0;
+ coder->ignore_check = (flags & LZMA_IGNORE_CHECK) != 0;
+ coder->concatenated = (flags & LZMA_CONCATENATED) != 0;
+ coder->first_stream = true;
+
+ return stream_decoder_reset(coder, allocator);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_stream_decoder(lzma_stream *strm, uint64_t memlimit, uint32_t flags)
+{
+ lzma_next_strm_init(lzma_stream_decoder_init, strm, memlimit, flags);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/stream_decoder.h b/src/liblzma/common/stream_decoder.h
new file mode 100644
index 0000000..c13c6ba
--- /dev/null
+++ b/src/liblzma/common/stream_decoder.h
@@ -0,0 +1,22 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file stream_decoder.h
+/// \brief Decodes .xz Streams
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_STREAM_DECODER_H
+#define LZMA_STREAM_DECODER_H
+
+#include "common.h"
+
+extern lzma_ret lzma_stream_decoder_init(
+ lzma_next_coder *next, const lzma_allocator *allocator,
+ uint64_t memlimit, uint32_t flags);
+
+#endif
diff --git a/src/liblzma/common/stream_decoder_mt.c b/src/liblzma/common/stream_decoder_mt.c
new file mode 100644
index 0000000..76212b4
--- /dev/null
+++ b/src/liblzma/common/stream_decoder_mt.c
@@ -0,0 +1,2018 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file stream_decoder_mt.c
+/// \brief Multithreaded .xz Stream decoder
+//
+// Authors: Sebastian Andrzej Siewior
+// Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+#include "block_decoder.h"
+#include "stream_decoder.h"
+#include "index.h"
+#include "outqueue.h"
+
+
+typedef enum {
+ /// Waiting for work.
+ /// Main thread may change this to THR_RUN or THR_EXIT.
+ THR_IDLE,
+
+ /// Decoding is in progress.
+ /// Main thread may change this to THR_STOP or THR_EXIT.
+ /// The worker thread may change this to THR_IDLE.
+ THR_RUN,
+
+ /// The main thread wants the thread to stop whatever it was doing
+ /// but not exit. Main thread may change this to THR_EXIT.
+ /// The worker thread may change this to THR_IDLE.
+ THR_STOP,
+
+ /// The main thread wants the thread to exit.
+ THR_EXIT,
+
+} worker_state;
+
+
+typedef enum {
+ /// Partial updates (storing of worker thread progress
+ /// to lzma_outbuf) are disabled.
+ PARTIAL_DISABLED,
+
+ /// Main thread requests partial updates to be enabled but
+ /// no partial update has been done by the worker thread yet.
+ ///
+ /// Changing from PARTIAL_DISABLED to PARTIAL_START requires
+ /// use of the worker-thread mutex. Other transitions don't
+ /// need a mutex.
+ PARTIAL_START,
+
+ /// Partial updates are enabled and the worker thread has done
+ /// at least one partial update.
+ PARTIAL_ENABLED,
+
+} partial_update_mode;
+
+
+struct worker_thread {
+ /// Worker state is protected with our mutex.
+ worker_state state;
+
+ /// Input buffer that will contain the whole Block except Block Header.
+ uint8_t *in;
+
+ /// Amount of memory allocated for "in"
+ size_t in_size;
+
+ /// Number of bytes written to "in" by the main thread
+ size_t in_filled;
+
+ /// Number of bytes consumed from "in" by the worker thread.
+ size_t in_pos;
+
+ /// Amount of uncompressed data that has been decoded. This local
+ /// copy is needed because updating outbuf->pos requires locking
+ /// the main mutex (coder->mutex).
+ size_t out_pos;
+
+ /// Pointer to the main structure is needed to (1) lock the main
+ /// mutex (coder->mutex) when updating outbuf->pos and (2) when
+ /// putting this thread back to the stack of free threads.
+ struct lzma_stream_coder *coder;
+
+ /// The allocator is set by the main thread. Since a copy of the
+ /// pointer is kept here, the application must not change the
+ /// allocator before calling lzma_end().
+ const lzma_allocator *allocator;
+
+ /// Output queue buffer to which the uncompressed data is written.
+ lzma_outbuf *outbuf;
+
+ /// Amount of compressed data that has already been decompressed.
+ /// This is updated from in_pos when our mutex is locked.
+ /// This is size_t, not uint64_t, because per-thread progress
+ /// is limited to sizes of allocated buffers.
+ size_t progress_in;
+
+ /// Like progress_in but for uncompressed data.
+ size_t progress_out;
+
+ /// Updating outbuf->pos requires locking the main mutex
+ /// (coder->mutex). Since the main thread will only read output
+ /// from the oldest outbuf in the queue, only the worker thread
+ /// that is associated with the oldest outbuf needs to update its
+ /// outbuf->pos. This avoids useless mutex contention that would
+ /// happen if all worker threads were frequently locking the main
+ /// mutex to update their outbuf->pos.
+ ///
+ /// Only when partial_update is something else than PARTIAL_DISABLED,
+ /// this worker thread will update outbuf->pos after each call to
+ /// the Block decoder.
+ partial_update_mode partial_update;
+
+ /// Block decoder
+ lzma_next_coder block_decoder;
+
+ /// Thread-specific Block options are needed because the Block
+ /// decoder modifies the struct given to it at initialization.
+ lzma_block block_options;
+
+ /// Filter chain memory usage
+ uint64_t mem_filters;
+
+ /// Next structure in the stack of free worker threads.
+ struct worker_thread *next;
+
+ mythread_mutex mutex;
+ mythread_cond cond;
+
+ /// The ID of this thread is used to join the thread
+ /// when it's not needed anymore.
+ mythread thread_id;
+};
+
+
+struct lzma_stream_coder {
+ enum {
+ SEQ_STREAM_HEADER,
+ SEQ_BLOCK_HEADER,
+ SEQ_BLOCK_INIT,
+ SEQ_BLOCK_THR_INIT,
+ SEQ_BLOCK_THR_RUN,
+ SEQ_BLOCK_DIRECT_INIT,
+ SEQ_BLOCK_DIRECT_RUN,
+ SEQ_INDEX_WAIT_OUTPUT,
+ SEQ_INDEX_DECODE,
+ SEQ_STREAM_FOOTER,
+ SEQ_STREAM_PADDING,
+ SEQ_ERROR,
+ } sequence;
+
+ /// Block decoder
+ lzma_next_coder block_decoder;
+
+ /// Every Block Header will be decoded into this structure.
+ /// This is also used to initialize a Block decoder when in
+ /// direct mode. In threaded mode, a thread-specific copy will
+ /// be made for decoder initialization because the Block decoder
+ /// will modify the structure given to it.
+ lzma_block block_options;
+
+ /// Buffer to hold a filter chain for Block Header decoding and
+ /// initialization. These are freed after successful Block decoder
+ /// initialization or at stream_decoder_mt_end(). The thread-specific
+ /// copy of block_options won't hold a pointer to filters[] after
+ /// initialization.
+ lzma_filter filters[LZMA_FILTERS_MAX + 1];
+
+ /// Stream Flags from Stream Header
+ lzma_stream_flags stream_flags;
+
+ /// Index is hashed so that it can be compared to the sizes of Blocks
+ /// with O(1) memory usage.
+ lzma_index_hash *index_hash;
+
+
+ /// Maximum wait time if cannot use all the input and cannot
+ /// fill the output buffer. This is in milliseconds.
+ uint32_t timeout;
+
+
+ /// Error code from a worker thread.
+ ///
+ /// \note Use mutex.
+ lzma_ret thread_error;
+
+ /// Error code to return after pending output has been copied out. If
+ /// set in read_output_and_wait(), this is a mirror of thread_error.
+ /// If set in stream_decode_mt() then it's, for example, error that
+ /// occurred when decoding Block Header.
+ lzma_ret pending_error;
+
+ /// Number of threads that will be created at maximum.
+ uint32_t threads_max;
+
+ /// Number of thread structures that have been initialized from
+ /// "threads", and thus the number of worker threads actually
+ /// created so far.
+ uint32_t threads_initialized;
+
+ /// Array of allocated thread-specific structures. When no threads
+ /// are in use (direct mode) this is NULL. In threaded mode this
+ /// points to an array of threads_max number of worker_thread structs.
+ struct worker_thread *threads;
+
+ /// Stack of free threads. When a thread finishes, it puts itself
+ /// back into this stack. This starts as empty because threads
+ /// are created only when actually needed.
+ ///
+ /// \note Use mutex.
+ struct worker_thread *threads_free;
+
+ /// The most recent worker thread to which the main thread writes
+ /// the new input from the application.
+ struct worker_thread *thr;
+
+ /// Output buffer queue for decompressed data from the worker threads
+ ///
+ /// \note Use mutex with operations that need it.
+ lzma_outq outq;
+
+ mythread_mutex mutex;
+ mythread_cond cond;
+
+
+ /// Memory usage that will not be exceeded in multi-threaded mode.
+ /// Single-threaded mode can exceed this even by a large amount.
+ uint64_t memlimit_threading;
+
+ /// Memory usage limit that should never be exceeded.
+ /// LZMA_MEMLIMIT_ERROR will be returned if decoding isn't possible
+ /// even in single-threaded mode without exceeding this limit.
+ uint64_t memlimit_stop;
+
+ /// Amount of memory in use by the direct mode decoder
+ /// (coder->block_decoder). In threaded mode this is 0.
+ uint64_t mem_direct_mode;
+
+ /// Amount of memory needed by the running worker threads.
+ /// This doesn't include the memory needed by the output buffer.
+ ///
+ /// \note Use mutex.
+ uint64_t mem_in_use;
+
+ /// Amount of memory used by the idle (cached) threads.
+ ///
+ /// \note Use mutex.
+ uint64_t mem_cached;
+
+
+ /// Amount of memory needed for the filter chain of the next Block.
+ uint64_t mem_next_filters;
+
+ /// Amount of memory needed for the thread-specific input buffer
+ /// for the next Block.
+ uint64_t mem_next_in;
+
+ /// Amount of memory actually needed to decode the next Block
+ /// in threaded mode. This is
+ /// mem_next_filters + mem_next_in + memory needed for lzma_outbuf.
+ uint64_t mem_next_block;
+
+
+ /// Amount of compressed data in Stream Header + Blocks that have
+ /// already been finished.
+ ///
+ /// \note Use mutex.
+ uint64_t progress_in;
+
+ /// Amount of uncompressed data in Blocks that have already
+ /// been finished.
+ ///
+ /// \note Use mutex.
+ uint64_t progress_out;
+
+
+ /// If true, LZMA_NO_CHECK is returned if the Stream has
+ /// no integrity check.
+ bool tell_no_check;
+
+ /// If true, LZMA_UNSUPPORTED_CHECK is returned if the Stream has
+ /// an integrity check that isn't supported by this liblzma build.
+ bool tell_unsupported_check;
+
+ /// If true, LZMA_GET_CHECK is returned after decoding Stream Header.
+ bool tell_any_check;
+
+ /// If true, we will tell the Block decoder to skip calculating
+ /// and verifying the integrity check.
+ bool ignore_check;
+
+ /// If true, we will decode concatenated Streams that possibly have
+ /// Stream Padding between or after them. LZMA_STREAM_END is returned
+ /// once the application isn't giving us any new input (LZMA_FINISH),
+ /// and we aren't in the middle of a Stream, and possible
+ /// Stream Padding is a multiple of four bytes.
+ bool concatenated;
+
+ /// If true, we will return any errors immediately instead of first
+ /// producing all output before the location of the error.
+ bool fail_fast;
+
+
+ /// When decoding concatenated Streams, this is true as long as we
+ /// are decoding the first Stream. This is needed to avoid misleading
+ /// LZMA_FORMAT_ERROR in case the later Streams don't have valid magic
+ /// bytes.
+ bool first_stream;
+
+ /// This is used to track if the previous call to stream_decode_mt()
+ /// had output space (*out_pos < out_size) and managed to fill the
+ /// output buffer (*out_pos == out_size). This may be set to true
+ /// in read_output_and_wait(). This is read and then reset to false
+ /// at the beginning of stream_decode_mt().
+ ///
+ /// This is needed to support applications that call lzma_code() in
+ /// such a way that more input is provided only when lzma_code()
+ /// didn't fill the output buffer completely. Basically, this makes
+ /// it easier to convert such applications from single-threaded
+ /// decoder to multi-threaded decoder.
+ bool out_was_filled;
+
+ /// Write position in buffer[] and position in Stream Padding
+ size_t pos;
+
+ /// Buffer to hold Stream Header, Block Header, and Stream Footer.
+ /// Block Header has biggest maximum size.
+ uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX];
+};
+
+
+/// Enables updating of outbuf->pos. This is a callback function that is
+/// used with lzma_outq_enable_partial_output().
+static void
+worker_enable_partial_update(void *thr_ptr)
+{
+ struct worker_thread *thr = thr_ptr;
+
+ mythread_sync(thr->mutex) {
+ thr->partial_update = PARTIAL_START;
+ mythread_cond_signal(&thr->cond);
+ }
+}
+
+
+/// Things do to at THR_STOP or when finishing a Block.
+/// This is called with thr->mutex locked.
+static void
+worker_stop(struct worker_thread *thr)
+{
+ // Update memory usage counters.
+ thr->coder->mem_in_use -= thr->in_size;
+ thr->in_size = 0; // thr->in was freed above.
+
+ thr->coder->mem_in_use -= thr->mem_filters;
+ thr->coder->mem_cached += thr->mem_filters;
+
+ // Put this thread to the stack of free threads.
+ thr->next = thr->coder->threads_free;
+ thr->coder->threads_free = thr;
+
+ mythread_cond_signal(&thr->coder->cond);
+ return;
+}
+
+
+static MYTHREAD_RET_TYPE
+worker_decoder(void *thr_ptr)
+{
+ struct worker_thread *thr = thr_ptr;
+ size_t in_filled;
+ partial_update_mode partial_update;
+ lzma_ret ret;
+
+next_loop_lock:
+
+ mythread_mutex_lock(&thr->mutex);
+next_loop_unlocked:
+
+ if (thr->state == THR_IDLE) {
+ mythread_cond_wait(&thr->cond, &thr->mutex);
+ goto next_loop_unlocked;
+ }
+
+ if (thr->state == THR_EXIT) {
+ mythread_mutex_unlock(&thr->mutex);
+
+ lzma_free(thr->in, thr->allocator);
+ lzma_next_end(&thr->block_decoder, thr->allocator);
+
+ mythread_mutex_destroy(&thr->mutex);
+ mythread_cond_destroy(&thr->cond);
+
+ return MYTHREAD_RET_VALUE;
+ }
+
+ if (thr->state == THR_STOP) {
+ thr->state = THR_IDLE;
+ mythread_mutex_unlock(&thr->mutex);
+
+ mythread_sync(thr->coder->mutex) {
+ worker_stop(thr);
+ }
+
+ goto next_loop_lock;
+ }
+
+ assert(thr->state == THR_RUN);
+
+ // Update progress info for get_progress().
+ thr->progress_in = thr->in_pos;
+ thr->progress_out = thr->out_pos;
+
+ // If we don't have any new input, wait for a signal from the main
+ // thread except if partial output has just been enabled. In that
+ // case we will do one normal run so that the partial output info
+ // gets passed to the main thread. The call to block_decoder.code()
+ // is useless but harmless as it can occur only once per Block.
+ in_filled = thr->in_filled;
+ partial_update = thr->partial_update;
+
+ if (in_filled == thr->in_pos && partial_update != PARTIAL_START) {
+ mythread_cond_wait(&thr->cond, &thr->mutex);
+ goto next_loop_unlocked;
+ }
+
+ mythread_mutex_unlock(&thr->mutex);
+
+ // Pass the input in small chunks to the Block decoder.
+ // This way we react reasonably fast if we are told to stop/exit,
+ // and (when partial update is enabled) we tell about our progress
+ // to the main thread frequently enough.
+ const size_t chunk_size = 16384;
+ if ((in_filled - thr->in_pos) > chunk_size)
+ in_filled = thr->in_pos + chunk_size;
+
+ ret = thr->block_decoder.code(
+ thr->block_decoder.coder, thr->allocator,
+ thr->in, &thr->in_pos, in_filled,
+ thr->outbuf->buf, &thr->out_pos,
+ thr->outbuf->allocated, LZMA_RUN);
+
+ if (ret == LZMA_OK) {
+ if (partial_update != PARTIAL_DISABLED) {
+ // The main thread uses thr->mutex to change from
+ // PARTIAL_DISABLED to PARTIAL_START. The main thread
+ // doesn't care about this variable after that so we
+ // can safely change it here to PARTIAL_ENABLED
+ // without a mutex.
+ thr->partial_update = PARTIAL_ENABLED;
+
+ // The main thread is reading decompressed data
+ // from thr->outbuf. Tell the main thread about
+ // our progress.
+ //
+ // NOTE: It's possible that we consumed input without
+ // producing any new output so it's possible that
+ // only in_pos has changed. In case of PARTIAL_START
+ // it is possible that neither in_pos nor out_pos has
+ // changed.
+ mythread_sync(thr->coder->mutex) {
+ thr->outbuf->pos = thr->out_pos;
+ thr->outbuf->decoder_in_pos = thr->in_pos;
+ mythread_cond_signal(&thr->coder->cond);
+ }
+ }
+
+ goto next_loop_lock;
+ }
+
+ // Either we finished successfully (LZMA_STREAM_END) or an error
+ // occurred. Both cases are handled almost identically. The error
+ // case requires updating thr->coder->thread_error.
+ //
+ // The sizes are in the Block Header and the Block decoder
+ // checks that they match, thus we know these:
+ assert(ret != LZMA_STREAM_END || thr->in_pos == thr->in_size);
+ assert(ret != LZMA_STREAM_END
+ || thr->out_pos == thr->block_options.uncompressed_size);
+
+ // Free the input buffer. Don't update in_size as we need
+ // it later to update thr->coder->mem_in_use.
+ lzma_free(thr->in, thr->allocator);
+ thr->in = NULL;
+
+ mythread_sync(thr->mutex) {
+ if (thr->state != THR_EXIT)
+ thr->state = THR_IDLE;
+ }
+
+ mythread_sync(thr->coder->mutex) {
+ // Move our progress info to the main thread.
+ thr->coder->progress_in += thr->in_pos;
+ thr->coder->progress_out += thr->out_pos;
+ thr->progress_in = 0;
+ thr->progress_out = 0;
+
+ // Mark the outbuf as finished.
+ thr->outbuf->pos = thr->out_pos;
+ thr->outbuf->decoder_in_pos = thr->in_pos;
+ thr->outbuf->finished = true;
+ thr->outbuf->finish_ret = ret;
+ thr->outbuf = NULL;
+
+ // If an error occurred, tell it to the main thread.
+ if (ret != LZMA_STREAM_END
+ && thr->coder->thread_error == LZMA_OK)
+ thr->coder->thread_error = ret;
+
+ worker_stop(thr);
+ }
+
+ goto next_loop_lock;
+}
+
+
+/// Tells the worker threads to exit and waits for them to terminate.
+static void
+threads_end(struct lzma_stream_coder *coder, const lzma_allocator *allocator)
+{
+ for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+ mythread_sync(coder->threads[i].mutex) {
+ coder->threads[i].state = THR_EXIT;
+ mythread_cond_signal(&coder->threads[i].cond);
+ }
+ }
+
+ for (uint32_t i = 0; i < coder->threads_initialized; ++i)
+ mythread_join(coder->threads[i].thread_id);
+
+ lzma_free(coder->threads, allocator);
+ coder->threads_initialized = 0;
+ coder->threads = NULL;
+ coder->threads_free = NULL;
+
+ // The threads don't update these when they exit. Do it here.
+ coder->mem_in_use = 0;
+ coder->mem_cached = 0;
+
+ return;
+}
+
+
+static void
+threads_stop(struct lzma_stream_coder *coder)
+{
+ for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+ mythread_sync(coder->threads[i].mutex) {
+ // The state must be changed conditionally because
+ // THR_IDLE -> THR_STOP is not a valid state change.
+ if (coder->threads[i].state != THR_IDLE) {
+ coder->threads[i].state = THR_STOP;
+ mythread_cond_signal(&coder->threads[i].cond);
+ }
+ }
+ }
+
+ return;
+}
+
+
+/// Initialize a new worker_thread structure and create a new thread.
+static lzma_ret
+initialize_new_thread(struct lzma_stream_coder *coder,
+ const lzma_allocator *allocator)
+{
+ // Allocate the coder->threads array if needed. It's done here instead
+ // of when initializing the decoder because we don't need this if we
+ // use the direct mode (we may even free coder->threads in the middle
+ // of the file if we switch from threaded to direct mode).
+ if (coder->threads == NULL) {
+ coder->threads = lzma_alloc(
+ coder->threads_max * sizeof(struct worker_thread),
+ allocator);
+
+ if (coder->threads == NULL)
+ return LZMA_MEM_ERROR;
+ }
+
+ // Pick a free structure.
+ assert(coder->threads_initialized < coder->threads_max);
+ struct worker_thread *thr
+ = &coder->threads[coder->threads_initialized];
+
+ if (mythread_mutex_init(&thr->mutex))
+ goto error_mutex;
+
+ if (mythread_cond_init(&thr->cond))
+ goto error_cond;
+
+ thr->state = THR_IDLE;
+ thr->in = NULL;
+ thr->in_size = 0;
+ thr->allocator = allocator;
+ thr->coder = coder;
+ thr->outbuf = NULL;
+ thr->block_decoder = LZMA_NEXT_CODER_INIT;
+ thr->mem_filters = 0;
+
+ if (mythread_create(&thr->thread_id, worker_decoder, thr))
+ goto error_thread;
+
+ ++coder->threads_initialized;
+ coder->thr = thr;
+
+ return LZMA_OK;
+
+error_thread:
+ mythread_cond_destroy(&thr->cond);
+
+error_cond:
+ mythread_mutex_destroy(&thr->mutex);
+
+error_mutex:
+ return LZMA_MEM_ERROR;
+}
+
+
+static lzma_ret
+get_thread(struct lzma_stream_coder *coder, const lzma_allocator *allocator)
+{
+ // If there is a free structure on the stack, use it.
+ mythread_sync(coder->mutex) {
+ if (coder->threads_free != NULL) {
+ coder->thr = coder->threads_free;
+ coder->threads_free = coder->threads_free->next;
+
+ // The thread is no longer in the cache so subtract
+ // it from the cached memory usage. Don't add it
+ // to mem_in_use though; the caller will handle it
+ // since it knows how much memory it will actually
+ // use (the filter chain might change).
+ coder->mem_cached -= coder->thr->mem_filters;
+ }
+ }
+
+ if (coder->thr == NULL) {
+ assert(coder->threads_initialized < coder->threads_max);
+
+ // Initialize a new thread.
+ return_if_error(initialize_new_thread(coder, allocator));
+ }
+
+ coder->thr->in_filled = 0;
+ coder->thr->in_pos = 0;
+ coder->thr->out_pos = 0;
+
+ coder->thr->progress_in = 0;
+ coder->thr->progress_out = 0;
+
+ coder->thr->partial_update = PARTIAL_DISABLED;
+
+ return LZMA_OK;
+}
+
+
+static lzma_ret
+read_output_and_wait(struct lzma_stream_coder *coder,
+ const lzma_allocator *allocator,
+ uint8_t *restrict out, size_t *restrict out_pos,
+ size_t out_size,
+ bool *input_is_possible,
+ bool waiting_allowed,
+ mythread_condtime *wait_abs, bool *has_blocked)
+{
+ lzma_ret ret = LZMA_OK;
+
+ mythread_sync(coder->mutex) {
+ do {
+ // Get as much output from the queue as is possible
+ // without blocking.
+ const size_t out_start = *out_pos;
+ do {
+ ret = lzma_outq_read(&coder->outq, allocator,
+ out, out_pos, out_size,
+ NULL, NULL);
+
+ // If a Block was finished, tell the worker
+ // thread of the next Block (if it is still
+ // running) to start telling the main thread
+ // when new output is available.
+ if (ret == LZMA_STREAM_END)
+ lzma_outq_enable_partial_output(
+ &coder->outq,
+ &worker_enable_partial_update);
+
+ // Loop until a Block wasn't finished.
+ // It's important to loop around even if
+ // *out_pos == out_size because there could
+ // be an empty Block that will return
+ // LZMA_STREAM_END without needing any
+ // output space.
+ } while (ret == LZMA_STREAM_END);
+
+ // Check if lzma_outq_read reported an error from
+ // the Block decoder.
+ if (ret != LZMA_OK)
+ break;
+
+ // If the output buffer is now full but it wasn't full
+ // when this function was called, set out_was_filled.
+ // This way the next call to stream_decode_mt() knows
+ // that some output was produced and no output space
+ // remained in the previous call to stream_decode_mt().
+ if (*out_pos == out_size && *out_pos != out_start)
+ coder->out_was_filled = true;
+
+ // Check if any thread has indicated an error.
+ if (coder->thread_error != LZMA_OK) {
+ // If LZMA_FAIL_FAST was used, report errors
+ // from worker threads immediately.
+ if (coder->fail_fast) {
+ ret = coder->thread_error;
+ break;
+ }
+
+ // Otherwise set pending_error. The value we
+ // set here will not actually get used other
+ // than working as a flag that an error has
+ // occurred. This is because in SEQ_ERROR
+ // all output before the error will be read
+ // first by calling this function, and once we
+ // reach the location of the (first) error the
+ // error code from the above lzma_outq_read()
+ // will be returned to the application.
+ //
+ // Use LZMA_PROG_ERROR since the value should
+ // never leak to the application. It's
+ // possible that pending_error has already
+ // been set but that doesn't matter: if we get
+ // here, pending_error only works as a flag.
+ coder->pending_error = LZMA_PROG_ERROR;
+ }
+
+ // Check if decoding of the next Block can be started.
+ // The memusage of the active threads must be low
+ // enough, there must be a free buffer slot in the
+ // output queue, and there must be a free thread
+ // (that can be either created or an existing one
+ // reused).
+ //
+ // NOTE: This is checked after reading the output
+ // above because reading the output can free a slot in
+ // the output queue and also reduce active memusage.
+ //
+ // NOTE: If output queue is empty, then input will
+ // always be possible.
+ if (input_is_possible != NULL
+ && coder->memlimit_threading
+ - coder->mem_in_use
+ - coder->outq.mem_in_use
+ >= coder->mem_next_block
+ && lzma_outq_has_buf(&coder->outq)
+ && (coder->threads_initialized
+ < coder->threads_max
+ || coder->threads_free
+ != NULL)) {
+ *input_is_possible = true;
+ break;
+ }
+
+ // If the caller doesn't want us to block, return now.
+ if (!waiting_allowed)
+ break;
+
+ // This check is needed only when input_is_possible
+ // is NULL. We must return if we aren't waiting for
+ // input to become possible and there is no more
+ // output coming from the queue.
+ if (lzma_outq_is_empty(&coder->outq)) {
+ assert(input_is_possible == NULL);
+ break;
+ }
+
+ // If there is more data available from the queue,
+ // our out buffer must be full and we need to return
+ // so that the application can provide more output
+ // space.
+ //
+ // NOTE: In general lzma_outq_is_readable() can return
+ // true also when there are no more bytes available.
+ // This can happen when a Block has finished without
+ // providing any new output. We know that this is not
+ // the case because in the beginning of this loop we
+ // tried to read as much as possible even when we had
+ // no output space left and the mutex has been locked
+ // all the time (so worker threads cannot have changed
+ // anything). Thus there must be actual pending output
+ // in the queue.
+ if (lzma_outq_is_readable(&coder->outq)) {
+ assert(*out_pos == out_size);
+ break;
+ }
+
+ // If the application stops providing more input
+ // in the middle of a Block, there will eventually
+ // be one worker thread left that is stuck waiting for
+ // more input (that might never arrive) and a matching
+ // outbuf which the worker thread cannot finish due
+ // to lack of input. We must detect this situation,
+ // otherwise we would end up waiting indefinitely
+ // (if no timeout is in use) or keep returning
+ // LZMA_TIMED_OUT while making no progress. Thus, the
+ // application would never get LZMA_BUF_ERROR from
+ // lzma_code() which would tell the application that
+ // no more progress is possible. No LZMA_BUF_ERROR
+ // means that, for example, truncated .xz files could
+ // cause an infinite loop.
+ //
+ // A worker thread doing partial updates will
+ // store not only the output position in outbuf->pos
+ // but also the matching input position in
+ // outbuf->decoder_in_pos. Here we check if that
+ // input position matches the amount of input that
+ // the worker thread has been given (in_filled).
+ // If so, we must return and not wait as no more
+ // output will be coming without first getting more
+ // input to the worker thread. If the application
+ // keeps calling lzma_code() without providing more
+ // input, it will eventually get LZMA_BUF_ERROR.
+ //
+ // NOTE: We can read partial_update and in_filled
+ // without thr->mutex as only the main thread
+ // modifies these variables. decoder_in_pos requires
+ // coder->mutex which we are already holding.
+ if (coder->thr != NULL && coder->thr->partial_update
+ != PARTIAL_DISABLED) {
+ // There is exactly one outbuf in the queue.
+ assert(coder->thr->outbuf == coder->outq.head);
+ assert(coder->thr->outbuf == coder->outq.tail);
+
+ if (coder->thr->outbuf->decoder_in_pos
+ == coder->thr->in_filled)
+ break;
+ }
+
+ // Wait for input or output to become possible.
+ if (coder->timeout != 0) {
+ // See the comment in stream_encoder_mt.c
+ // about why mythread_condtime_set() is used
+ // like this.
+ //
+ // FIXME?
+ // In contrast to the encoder, this calls
+ // _condtime_set while the mutex is locked.
+ if (!*has_blocked) {
+ *has_blocked = true;
+ mythread_condtime_set(wait_abs,
+ &coder->cond,
+ coder->timeout);
+ }
+
+ if (mythread_cond_timedwait(&coder->cond,
+ &coder->mutex,
+ wait_abs) != 0) {
+ ret = LZMA_TIMED_OUT;
+ break;
+ }
+ } else {
+ mythread_cond_wait(&coder->cond,
+ &coder->mutex);
+ }
+ } while (ret == LZMA_OK);
+ }
+
+ // If we are returning an error, then the application cannot get
+ // more output from us and thus keeping the threads running is
+ // useless and waste of CPU time.
+ if (ret != LZMA_OK && ret != LZMA_TIMED_OUT)
+ threads_stop(coder);
+
+ return ret;
+}
+
+
+static lzma_ret
+decode_block_header(struct lzma_stream_coder *coder,
+ const lzma_allocator *allocator, const uint8_t *restrict in,
+ size_t *restrict in_pos, size_t in_size)
+{
+ if (*in_pos >= in_size)
+ return LZMA_OK;
+
+ if (coder->pos == 0) {
+ // Detect if it's Index.
+ if (in[*in_pos] == INDEX_INDICATOR)
+ return LZMA_INDEX_DETECTED;
+
+ // Calculate the size of the Block Header. Note that
+ // Block Header decoder wants to see this byte too
+ // so don't advance *in_pos.
+ coder->block_options.header_size
+ = lzma_block_header_size_decode(
+ in[*in_pos]);
+ }
+
+ // Copy the Block Header to the internal buffer.
+ lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
+ coder->block_options.header_size);
+
+ // Return if we didn't get the whole Block Header yet.
+ if (coder->pos < coder->block_options.header_size)
+ return LZMA_OK;
+
+ coder->pos = 0;
+
+ // Version 1 is needed to support the .ignore_check option.
+ coder->block_options.version = 1;
+
+ // Block Header decoder will initialize all members of this array
+ // so we don't need to do it here.
+ coder->block_options.filters = coder->filters;
+
+ // Decode the Block Header.
+ return_if_error(lzma_block_header_decode(&coder->block_options,
+ allocator, coder->buffer));
+
+ // If LZMA_IGNORE_CHECK was used, this flag needs to be set.
+ // It has to be set after lzma_block_header_decode() because
+ // it always resets this to false.
+ coder->block_options.ignore_check = coder->ignore_check;
+
+ // coder->block_options is ready now.
+ return LZMA_STREAM_END;
+}
+
+
+/// Get the size of the Compressed Data + Block Padding + Check.
+static size_t
+comp_blk_size(const struct lzma_stream_coder *coder)
+{
+ return vli_ceil4(coder->block_options.compressed_size)
+ + lzma_check_size(coder->stream_flags.check);
+}
+
+
+/// Returns true if the size (compressed or uncompressed) is such that
+/// threaded decompression cannot be used. Sizes that are too big compared
+/// to SIZE_MAX must be rejected to avoid integer overflows and truncations
+/// when lzma_vli is assigned to a size_t.
+static bool
+is_direct_mode_needed(lzma_vli size)
+{
+ return size == LZMA_VLI_UNKNOWN || size > SIZE_MAX / 3;
+}
+
+
+static lzma_ret
+stream_decoder_reset(struct lzma_stream_coder *coder,
+ const lzma_allocator *allocator)
+{
+ // Initialize the Index hash used to verify the Index.
+ coder->index_hash = lzma_index_hash_init(coder->index_hash, allocator);
+ if (coder->index_hash == NULL)
+ return LZMA_MEM_ERROR;
+
+ // Reset the rest of the variables.
+ coder->sequence = SEQ_STREAM_HEADER;
+ coder->pos = 0;
+
+ return LZMA_OK;
+}
+
+
+static lzma_ret
+stream_decode_mt(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size,
+ uint8_t *restrict out, size_t *restrict out_pos,
+ size_t out_size, lzma_action action)
+{
+ struct lzma_stream_coder *coder = coder_ptr;
+
+ mythread_condtime wait_abs;
+ bool has_blocked = false;
+
+ // Determine if in SEQ_BLOCK_HEADER and SEQ_BLOCK_THR_RUN we should
+ // tell read_output_and_wait() to wait until it can fill the output
+ // buffer (or a timeout occurs). Two conditions must be met:
+ //
+ // (1) If the caller provided no new input. The reason for this
+ // can be, for example, the end of the file or that there is
+ // a pause in the input stream and more input is available
+ // a little later. In this situation we should wait for output
+ // because otherwise we would end up in a busy-waiting loop where
+ // we make no progress and the application just calls us again
+ // without providing any new input. This would then result in
+ // LZMA_BUF_ERROR even though more output would be available
+ // once the worker threads decode more data.
+ //
+ // (2) Even if (1) is true, we will not wait if the previous call to
+ // this function managed to produce some output and the output
+ // buffer became full. This is for compatibility with applications
+ // that call lzma_code() in such a way that new input is provided
+ // only when the output buffer didn't become full. Without this
+ // trick such applications would have bad performance (bad
+ // parallelization due to decoder not getting input fast enough).
+ //
+ // NOTE: Such loops might require that timeout is disabled (0)
+ // if they assume that output-not-full implies that all input has
+ // been consumed. If and only if timeout is enabled, we may return
+ // when output isn't full *and* not all input has been consumed.
+ //
+ // However, if LZMA_FINISH is used, the above is ignored and we always
+ // wait (timeout can still cause us to return) because we know that
+ // we won't get any more input. This matters if the input file is
+ // truncated and we are doing single-shot decoding, that is,
+ // timeout = 0 and LZMA_FINISH is used on the first call to
+ // lzma_code() and the output buffer is known to be big enough
+ // to hold all uncompressed data:
+ //
+ // - If LZMA_FINISH wasn't handled specially, we could return
+ // LZMA_OK before providing all output that is possible with the
+ // truncated input. The rest would be available if lzma_code() was
+ // called again but then it's not single-shot decoding anymore.
+ //
+ // - By handling LZMA_FINISH specially here, the first call will
+ // produce all the output, matching the behavior of the
+ // single-threaded decoder.
+ //
+ // So it's a very specific corner case but also easy to avoid. Note
+ // that this special handling of LZMA_FINISH has no effect for
+ // single-shot decoding when the input file is valid (not truncated);
+ // premature LZMA_OK wouldn't be possible as long as timeout = 0.
+ const bool waiting_allowed = action == LZMA_FINISH
+ || (*in_pos == in_size && !coder->out_was_filled);
+ coder->out_was_filled = false;
+
+ while (true)
+ switch (coder->sequence) {
+ case SEQ_STREAM_HEADER: {
+ // Copy the Stream Header to the internal buffer.
+ const size_t in_old = *in_pos;
+ lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
+ LZMA_STREAM_HEADER_SIZE);
+ coder->progress_in += *in_pos - in_old;
+
+ // Return if we didn't get the whole Stream Header yet.
+ if (coder->pos < LZMA_STREAM_HEADER_SIZE)
+ return LZMA_OK;
+
+ coder->pos = 0;
+
+ // Decode the Stream Header.
+ const lzma_ret ret = lzma_stream_header_decode(
+ &coder->stream_flags, coder->buffer);
+ if (ret != LZMA_OK)
+ return ret == LZMA_FORMAT_ERROR && !coder->first_stream
+ ? LZMA_DATA_ERROR : ret;
+
+ // If we are decoding concatenated Streams, and the later
+ // Streams have invalid Header Magic Bytes, we give
+ // LZMA_DATA_ERROR instead of LZMA_FORMAT_ERROR.
+ coder->first_stream = false;
+
+ // Copy the type of the Check so that Block Header and Block
+ // decoders see it.
+ coder->block_options.check = coder->stream_flags.check;
+
+ // Even if we return LZMA_*_CHECK below, we want
+ // to continue from Block Header decoding.
+ coder->sequence = SEQ_BLOCK_HEADER;
+
+ // Detect if there's no integrity check or if it is
+ // unsupported if those were requested by the application.
+ if (coder->tell_no_check && coder->stream_flags.check
+ == LZMA_CHECK_NONE)
+ return LZMA_NO_CHECK;
+
+ if (coder->tell_unsupported_check
+ && !lzma_check_is_supported(
+ coder->stream_flags.check))
+ return LZMA_UNSUPPORTED_CHECK;
+
+ if (coder->tell_any_check)
+ return LZMA_GET_CHECK;
+ }
+
+ // Fall through
+
+ case SEQ_BLOCK_HEADER: {
+ const size_t in_old = *in_pos;
+ const lzma_ret ret = decode_block_header(coder, allocator,
+ in, in_pos, in_size);
+ coder->progress_in += *in_pos - in_old;
+
+ if (ret == LZMA_OK) {
+ // We didn't decode the whole Block Header yet.
+ //
+ // Read output from the queue before returning. This
+ // is important because it is possible that the
+ // application doesn't have any new input available
+ // immediately. If we didn't try to copy output from
+ // the output queue here, lzma_code() could end up
+ // returning LZMA_BUF_ERROR even though queued output
+ // is available.
+ //
+ // If the lzma_code() call provided at least one input
+ // byte, only copy as much data from the output queue
+ // as is available immediately. This way the
+ // application will be able to provide more input
+ // without a delay.
+ //
+ // On the other hand, if lzma_code() was called with
+ // an empty input buffer(*), treat it specially: try
+ // to fill the output buffer even if it requires
+ // waiting for the worker threads to provide output
+ // (timeout, if specified, can still cause us to
+ // return).
+ //
+ // - This way the application will be able to get all
+ // data that can be decoded from the input provided
+ // so far.
+ //
+ // - We avoid both premature LZMA_BUF_ERROR and
+ // busy-waiting where the application repeatedly
+ // calls lzma_code() which immediately returns
+ // LZMA_OK without providing new data.
+ //
+ // - If the queue becomes empty, we won't wait
+ // anything and will return LZMA_OK immediately
+ // (coder->timeout is completely ignored).
+ //
+ // (*) See the comment at the beginning of this
+ // function how waiting_allowed is determined
+ // and why there is an exception to the rule
+ // of "called with an empty input buffer".
+ assert(*in_pos == in_size);
+
+ // If LZMA_FINISH was used we know that we won't get
+ // more input, so the file must be truncated if we
+ // get here. If worker threads don't detect any
+ // errors, eventually there will be no more output
+ // while we keep returning LZMA_OK which gets
+ // converted to LZMA_BUF_ERROR in lzma_code().
+ //
+ // If fail-fast is enabled then we will return
+ // immediately using LZMA_DATA_ERROR instead of
+ // LZMA_OK or LZMA_BUF_ERROR. Rationale for the
+ // error code:
+ //
+ // - Worker threads may have a large amount of
+ // not-yet-decoded input data and we don't
+ // know for sure if all data is valid. Bad
+ // data there would result in LZMA_DATA_ERROR
+ // when fail-fast isn't used.
+ //
+ // - Immediate LZMA_BUF_ERROR would be a bit weird
+ // considering the older liblzma code. lzma_code()
+ // even has an assertion to prevent coders from
+ // returning LZMA_BUF_ERROR directly.
+ //
+ // The downside of this is that with fail-fast apps
+ // cannot always distinguish between corrupt and
+ // truncated files.
+ if (action == LZMA_FINISH && coder->fail_fast) {
+ // We won't produce any more output. Stop
+ // the unfinished worker threads so they
+ // won't waste CPU time.
+ threads_stop(coder);
+ return LZMA_DATA_ERROR;
+ }
+
+ // read_output_and_wait() will call threads_stop()
+ // if needed so with that we can use return_if_error.
+ return_if_error(read_output_and_wait(coder, allocator,
+ out, out_pos, out_size,
+ NULL, waiting_allowed,
+ &wait_abs, &has_blocked));
+
+ if (coder->pending_error != LZMA_OK) {
+ coder->sequence = SEQ_ERROR;
+ break;
+ }
+
+ return LZMA_OK;
+ }
+
+ if (ret == LZMA_INDEX_DETECTED) {
+ coder->sequence = SEQ_INDEX_WAIT_OUTPUT;
+ break;
+ }
+
+ // See if an error occurred.
+ if (ret != LZMA_STREAM_END) {
+ // NOTE: Here and in all other places where
+ // pending_error is set, it may overwrite the value
+ // (LZMA_PROG_ERROR) set by read_output_and_wait().
+ // That function might overwrite value set here too.
+ // These are fine because when read_output_and_wait()
+ // sets pending_error, it actually works as a flag
+ // variable only ("some error has occurred") and the
+ // actual value of pending_error is not used in
+ // SEQ_ERROR. In such cases SEQ_ERROR will eventually
+ // get the correct error code from the return value of
+ // a later read_output_and_wait() call.
+ coder->pending_error = ret;
+ coder->sequence = SEQ_ERROR;
+ break;
+ }
+
+ // Calculate the memory usage of the filters / Block decoder.
+ coder->mem_next_filters = lzma_raw_decoder_memusage(
+ coder->filters);
+
+ if (coder->mem_next_filters == UINT64_MAX) {
+ // One or more unknown Filter IDs.
+ coder->pending_error = LZMA_OPTIONS_ERROR;
+ coder->sequence = SEQ_ERROR;
+ break;
+ }
+
+ coder->sequence = SEQ_BLOCK_INIT;
+ }
+
+ // Fall through
+
+ case SEQ_BLOCK_INIT: {
+ // Check if decoding is possible at all with the current
+ // memlimit_stop which we must never exceed.
+ //
+ // This needs to be the first thing in SEQ_BLOCK_INIT
+ // to make it possible to restart decoding after increasing
+ // memlimit_stop with lzma_memlimit_set().
+ if (coder->mem_next_filters > coder->memlimit_stop) {
+ // Flush pending output before returning
+ // LZMA_MEMLIMIT_ERROR. If the application doesn't
+ // want to increase the limit, at least it will get
+ // all the output possible so far.
+ return_if_error(read_output_and_wait(coder, allocator,
+ out, out_pos, out_size,
+ NULL, true, &wait_abs, &has_blocked));
+
+ if (!lzma_outq_is_empty(&coder->outq))
+ return LZMA_OK;
+
+ return LZMA_MEMLIMIT_ERROR;
+ }
+
+ // Check if the size information is available in Block Header.
+ // If it is, check if the sizes are small enough that we don't
+ // need to worry *too* much about integer overflows later in
+ // the code. If these conditions are not met, we must use the
+ // single-threaded direct mode.
+ if (is_direct_mode_needed(coder->block_options.compressed_size)
+ || is_direct_mode_needed(
+ coder->block_options.uncompressed_size)) {
+ coder->sequence = SEQ_BLOCK_DIRECT_INIT;
+ break;
+ }
+
+ // Calculate the amount of memory needed for the input and
+ // output buffers in threaded mode.
+ //
+ // These cannot overflow because we already checked that
+ // the sizes are small enough using is_direct_mode_needed().
+ coder->mem_next_in = comp_blk_size(coder);
+ const uint64_t mem_buffers = coder->mem_next_in
+ + lzma_outq_outbuf_memusage(
+ coder->block_options.uncompressed_size);
+
+ // Add the amount needed by the filters.
+ // Avoid integer overflows.
+ if (UINT64_MAX - mem_buffers < coder->mem_next_filters) {
+ // Use direct mode if the memusage would overflow.
+ // This is a theoretical case that shouldn't happen
+ // in practice unless the input file is weird (broken
+ // or malicious).
+ coder->sequence = SEQ_BLOCK_DIRECT_INIT;
+ break;
+ }
+
+ // Amount of memory needed to decode this Block in
+ // threaded mode:
+ coder->mem_next_block = coder->mem_next_filters + mem_buffers;
+
+ // If this alone would exceed memlimit_threading, then we must
+ // use the single-threaded direct mode.
+ if (coder->mem_next_block > coder->memlimit_threading) {
+ coder->sequence = SEQ_BLOCK_DIRECT_INIT;
+ break;
+ }
+
+ // Use the threaded mode. Free the direct mode decoder in
+ // case it has been initialized.
+ lzma_next_end(&coder->block_decoder, allocator);
+ coder->mem_direct_mode = 0;
+
+ // Since we already know what the sizes are supposed to be,
+ // we can already add them to the Index hash. The Block
+ // decoder will verify the values while decoding.
+ const lzma_ret ret = lzma_index_hash_append(coder->index_hash,
+ lzma_block_unpadded_size(
+ &coder->block_options),
+ coder->block_options.uncompressed_size);
+ if (ret != LZMA_OK) {
+ coder->pending_error = ret;
+ coder->sequence = SEQ_ERROR;
+ break;
+ }
+
+ coder->sequence = SEQ_BLOCK_THR_INIT;
+ }
+
+ // Fall through
+
+ case SEQ_BLOCK_THR_INIT: {
+ // We need to wait for a multiple conditions to become true
+ // until we can initialize the Block decoder and let a worker
+ // thread decode it:
+ //
+ // - Wait for the memory usage of the active threads to drop
+ // so that starting the decoding of this Block won't make
+ // us go over memlimit_threading.
+ //
+ // - Wait for at least one free output queue slot.
+ //
+ // - Wait for a free worker thread.
+ //
+ // While we wait, we must copy decompressed data to the out
+ // buffer and catch possible decoder errors.
+ //
+ // read_output_and_wait() does all the above.
+ bool block_can_start = false;
+
+ return_if_error(read_output_and_wait(coder, allocator,
+ out, out_pos, out_size,
+ &block_can_start, true,
+ &wait_abs, &has_blocked));
+
+ if (coder->pending_error != LZMA_OK) {
+ coder->sequence = SEQ_ERROR;
+ break;
+ }
+
+ if (!block_can_start) {
+ // It's not a timeout because return_if_error handles
+ // it already. Output queue cannot be empty either
+ // because in that case block_can_start would have
+ // been true. Thus the output buffer must be full and
+ // the queue isn't empty.
+ assert(*out_pos == out_size);
+ assert(!lzma_outq_is_empty(&coder->outq));
+ return LZMA_OK;
+ }
+
+ // We know that we can start decoding this Block without
+ // exceeding memlimit_threading. However, to stay below
+ // memlimit_threading may require freeing some of the
+ // cached memory.
+ //
+ // Get a local copy of variables that require locking the
+ // mutex. It is fine if the worker threads modify the real
+ // values after we read these as those changes can only be
+ // towards more favorable conditions (less memory in use,
+ // more in cache).
+ //
+ // These are initialized to silence warnings.
+ uint64_t mem_in_use = 0;
+ uint64_t mem_cached = 0;
+ struct worker_thread *thr = NULL;
+
+ mythread_sync(coder->mutex) {
+ mem_in_use = coder->mem_in_use;
+ mem_cached = coder->mem_cached;
+ thr = coder->threads_free;
+ }
+
+ // The maximum amount of memory that can be held by other
+ // threads and cached buffers while allowing us to start
+ // decoding the next Block.
+ const uint64_t mem_max = coder->memlimit_threading
+ - coder->mem_next_block;
+
+ // If the existing allocations are so large that starting
+ // to decode this Block might exceed memlimit_threads,
+ // try to free memory from the output queue cache first.
+ //
+ // NOTE: This math assumes the worst case. It's possible
+ // that the limit wouldn't be exceeded if the existing cached
+ // allocations are reused.
+ if (mem_in_use + mem_cached + coder->outq.mem_allocated
+ > mem_max) {
+ // Clear the outq cache except leave one buffer in
+ // the cache if its size is correct. That way we
+ // don't free and almost immediately reallocate
+ // an identical buffer.
+ lzma_outq_clear_cache2(&coder->outq, allocator,
+ coder->block_options.uncompressed_size);
+ }
+
+ // If there is at least one worker_thread in the cache and
+ // the existing allocations are so large that starting to
+ // decode this Block might exceed memlimit_threads, free
+ // memory by freeing cached Block decoders.
+ //
+ // NOTE: The comparison is different here than above.
+ // Here we don't care about cached buffers in outq anymore
+ // and only look at memory actually in use. This is because
+ // if there is something in outq cache, it's a single buffer
+ // that can be used as is. We ensured this in the above
+ // if-block.
+ uint64_t mem_freed = 0;
+ if (thr != NULL && mem_in_use + mem_cached
+ + coder->outq.mem_in_use > mem_max) {
+ // Don't free the first Block decoder if its memory
+ // usage isn't greater than what this Block will need.
+ // Typically the same filter chain is used for all
+ // Blocks so this way the allocations can be reused
+ // when get_thread() picks the first worker_thread
+ // from the cache.
+ if (thr->mem_filters <= coder->mem_next_filters)
+ thr = thr->next;
+
+ while (thr != NULL) {
+ lzma_next_end(&thr->block_decoder, allocator);
+ mem_freed += thr->mem_filters;
+ thr->mem_filters = 0;
+ thr = thr->next;
+ }
+ }
+
+ // Update the memory usage counters. Note that coder->mem_*
+ // may have changed since we read them so we must subtract
+ // or add the changes.
+ mythread_sync(coder->mutex) {
+ coder->mem_cached -= mem_freed;
+
+ // Memory needed for the filters and the input buffer.
+ // The output queue takes care of its own counter so
+ // we don't touch it here.
+ //
+ // NOTE: After this, coder->mem_in_use +
+ // coder->mem_cached might count the same thing twice.
+ // If so, this will get corrected in get_thread() when
+ // a worker_thread is picked from coder->free_threads
+ // and its memory usage is subtracted from mem_cached.
+ coder->mem_in_use += coder->mem_next_in
+ + coder->mem_next_filters;
+ }
+
+ // Allocate memory for the output buffer in the output queue.
+ lzma_ret ret = lzma_outq_prealloc_buf(
+ &coder->outq, allocator,
+ coder->block_options.uncompressed_size);
+ if (ret != LZMA_OK) {
+ threads_stop(coder);
+ return ret;
+ }
+
+ // Set up coder->thr.
+ ret = get_thread(coder, allocator);
+ if (ret != LZMA_OK) {
+ threads_stop(coder);
+ return ret;
+ }
+
+ // The new Block decoder memory usage is already counted in
+ // coder->mem_in_use. Store it in the thread too.
+ coder->thr->mem_filters = coder->mem_next_filters;
+
+ // Initialize the Block decoder.
+ coder->thr->block_options = coder->block_options;
+ ret = lzma_block_decoder_init(
+ &coder->thr->block_decoder, allocator,
+ &coder->thr->block_options);
+
+ // Free the allocated filter options since they are needed
+ // only to initialize the Block decoder.
+ lzma_filters_free(coder->filters, allocator);
+ coder->thr->block_options.filters = NULL;
+
+ // Check if memory usage calculation and Block encoder
+ // initialization succeeded.
+ if (ret != LZMA_OK) {
+ coder->pending_error = ret;
+ coder->sequence = SEQ_ERROR;
+ break;
+ }
+
+ // Allocate the input buffer.
+ coder->thr->in_size = coder->mem_next_in;
+ coder->thr->in = lzma_alloc(coder->thr->in_size, allocator);
+ if (coder->thr->in == NULL) {
+ threads_stop(coder);
+ return LZMA_MEM_ERROR;
+ }
+
+ // Get the preallocated output buffer.
+ coder->thr->outbuf = lzma_outq_get_buf(
+ &coder->outq, coder->thr);
+
+ // Start the decoder.
+ mythread_sync(coder->thr->mutex) {
+ assert(coder->thr->state == THR_IDLE);
+ coder->thr->state = THR_RUN;
+ mythread_cond_signal(&coder->thr->cond);
+ }
+
+ // Enable output from the thread that holds the oldest output
+ // buffer in the output queue (if such a thread exists).
+ mythread_sync(coder->mutex) {
+ lzma_outq_enable_partial_output(&coder->outq,
+ &worker_enable_partial_update);
+ }
+
+ coder->sequence = SEQ_BLOCK_THR_RUN;
+ }
+
+ // Fall through
+
+ case SEQ_BLOCK_THR_RUN: {
+ if (action == LZMA_FINISH && coder->fail_fast) {
+ // We know that we won't get more input and that
+ // the caller wants fail-fast behavior. If we see
+ // that we don't have enough input to finish this
+ // Block, return LZMA_DATA_ERROR immediately.
+ // See SEQ_BLOCK_HEADER for the error code rationale.
+ const size_t in_avail = in_size - *in_pos;
+ const size_t in_needed = coder->thr->in_size
+ - coder->thr->in_filled;
+ if (in_avail < in_needed) {
+ threads_stop(coder);
+ return LZMA_DATA_ERROR;
+ }
+ }
+
+ // Copy input to the worker thread.
+ size_t cur_in_filled = coder->thr->in_filled;
+ lzma_bufcpy(in, in_pos, in_size, coder->thr->in,
+ &cur_in_filled, coder->thr->in_size);
+
+ // Tell the thread how much we copied.
+ mythread_sync(coder->thr->mutex) {
+ coder->thr->in_filled = cur_in_filled;
+
+ // NOTE: Most of the time we are copying input faster
+ // than the thread can decode so most of the time
+ // calling mythread_cond_signal() is useless but
+ // we cannot make it conditional because thr->in_pos
+ // is updated without a mutex. And the overhead should
+ // be very much negligible anyway.
+ mythread_cond_signal(&coder->thr->cond);
+ }
+
+ // Read output from the output queue. Just like in
+ // SEQ_BLOCK_HEADER, we wait to fill the output buffer
+ // only if waiting_allowed was set to true in the beginning
+ // of this function (see the comment there).
+ return_if_error(read_output_and_wait(coder, allocator,
+ out, out_pos, out_size,
+ NULL, waiting_allowed,
+ &wait_abs, &has_blocked));
+
+ if (coder->pending_error != LZMA_OK) {
+ coder->sequence = SEQ_ERROR;
+ break;
+ }
+
+ // Return if the input didn't contain the whole Block.
+ if (coder->thr->in_filled < coder->thr->in_size) {
+ assert(*in_pos == in_size);
+ return LZMA_OK;
+ }
+
+ // The whole Block has been copied to the thread-specific
+ // buffer. Continue from the next Block Header or Index.
+ coder->thr = NULL;
+ coder->sequence = SEQ_BLOCK_HEADER;
+ break;
+ }
+
+ case SEQ_BLOCK_DIRECT_INIT: {
+ // Wait for the threads to finish and that all decoded data
+ // has been copied to the output. That is, wait until the
+ // output queue becomes empty.
+ //
+ // NOTE: No need to check for coder->pending_error as
+ // we aren't consuming any input until the queue is empty
+ // and if there is a pending error, read_output_and_wait()
+ // will eventually return it before the queue is empty.
+ return_if_error(read_output_and_wait(coder, allocator,
+ out, out_pos, out_size,
+ NULL, true, &wait_abs, &has_blocked));
+ if (!lzma_outq_is_empty(&coder->outq))
+ return LZMA_OK;
+
+ // Free the cached output buffers.
+ lzma_outq_clear_cache(&coder->outq, allocator);
+
+ // Get rid of the worker threads, including the coder->threads
+ // array.
+ threads_end(coder, allocator);
+
+ // Initialize the Block decoder.
+ const lzma_ret ret = lzma_block_decoder_init(
+ &coder->block_decoder, allocator,
+ &coder->block_options);
+
+ // Free the allocated filter options since they are needed
+ // only to initialize the Block decoder.
+ lzma_filters_free(coder->filters, allocator);
+ coder->block_options.filters = NULL;
+
+ // Check if Block decoder initialization succeeded.
+ if (ret != LZMA_OK)
+ return ret;
+
+ // Make the memory usage visible to _memconfig().
+ coder->mem_direct_mode = coder->mem_next_filters;
+
+ coder->sequence = SEQ_BLOCK_DIRECT_RUN;
+ }
+
+ // Fall through
+
+ case SEQ_BLOCK_DIRECT_RUN: {
+ const size_t in_old = *in_pos;
+ const size_t out_old = *out_pos;
+ const lzma_ret ret = coder->block_decoder.code(
+ coder->block_decoder.coder, allocator,
+ in, in_pos, in_size, out, out_pos, out_size,
+ action);
+ coder->progress_in += *in_pos - in_old;
+ coder->progress_out += *out_pos - out_old;
+
+ if (ret != LZMA_STREAM_END)
+ return ret;
+
+ // Block decoded successfully. Add the new size pair to
+ // the Index hash.
+ return_if_error(lzma_index_hash_append(coder->index_hash,
+ lzma_block_unpadded_size(
+ &coder->block_options),
+ coder->block_options.uncompressed_size));
+
+ coder->sequence = SEQ_BLOCK_HEADER;
+ break;
+ }
+
+ case SEQ_INDEX_WAIT_OUTPUT:
+ // Flush the output from all worker threads so that we can
+ // decode the Index without thinking about threading.
+ return_if_error(read_output_and_wait(coder, allocator,
+ out, out_pos, out_size,
+ NULL, true, &wait_abs, &has_blocked));
+
+ if (!lzma_outq_is_empty(&coder->outq))
+ return LZMA_OK;
+
+ coder->sequence = SEQ_INDEX_DECODE;
+
+ // Fall through
+
+ case SEQ_INDEX_DECODE: {
+ // If we don't have any input, don't call
+ // lzma_index_hash_decode() since it would return
+ // LZMA_BUF_ERROR, which we must not do here.
+ if (*in_pos >= in_size)
+ return LZMA_OK;
+
+ // Decode the Index and compare it to the hash calculated
+ // from the sizes of the Blocks (if any).
+ const size_t in_old = *in_pos;
+ const lzma_ret ret = lzma_index_hash_decode(coder->index_hash,
+ in, in_pos, in_size);
+ coder->progress_in += *in_pos - in_old;
+ if (ret != LZMA_STREAM_END)
+ return ret;
+
+ coder->sequence = SEQ_STREAM_FOOTER;
+ }
+
+ // Fall through
+
+ case SEQ_STREAM_FOOTER: {
+ // Copy the Stream Footer to the internal buffer.
+ const size_t in_old = *in_pos;
+ lzma_bufcpy(in, in_pos, in_size, coder->buffer, &coder->pos,
+ LZMA_STREAM_HEADER_SIZE);
+ coder->progress_in += *in_pos - in_old;
+
+ // Return if we didn't get the whole Stream Footer yet.
+ if (coder->pos < LZMA_STREAM_HEADER_SIZE)
+ return LZMA_OK;
+
+ coder->pos = 0;
+
+ // Decode the Stream Footer. The decoder gives
+ // LZMA_FORMAT_ERROR if the magic bytes don't match,
+ // so convert that return code to LZMA_DATA_ERROR.
+ lzma_stream_flags footer_flags;
+ const lzma_ret ret = lzma_stream_footer_decode(
+ &footer_flags, coder->buffer);
+ if (ret != LZMA_OK)
+ return ret == LZMA_FORMAT_ERROR
+ ? LZMA_DATA_ERROR : ret;
+
+ // Check that Index Size stored in the Stream Footer matches
+ // the real size of the Index field.
+ if (lzma_index_hash_size(coder->index_hash)
+ != footer_flags.backward_size)
+ return LZMA_DATA_ERROR;
+
+ // Compare that the Stream Flags fields are identical in
+ // both Stream Header and Stream Footer.
+ return_if_error(lzma_stream_flags_compare(
+ &coder->stream_flags, &footer_flags));
+
+ if (!coder->concatenated)
+ return LZMA_STREAM_END;
+
+ coder->sequence = SEQ_STREAM_PADDING;
+ }
+
+ // Fall through
+
+ case SEQ_STREAM_PADDING:
+ assert(coder->concatenated);
+
+ // Skip over possible Stream Padding.
+ while (true) {
+ if (*in_pos >= in_size) {
+ // Unless LZMA_FINISH was used, we cannot
+ // know if there's more input coming later.
+ if (action != LZMA_FINISH)
+ return LZMA_OK;
+
+ // Stream Padding must be a multiple of
+ // four bytes.
+ return coder->pos == 0
+ ? LZMA_STREAM_END
+ : LZMA_DATA_ERROR;
+ }
+
+ // If the byte is not zero, it probably indicates
+ // beginning of a new Stream (or the file is corrupt).
+ if (in[*in_pos] != 0x00)
+ break;
+
+ ++*in_pos;
+ ++coder->progress_in;
+ coder->pos = (coder->pos + 1) & 3;
+ }
+
+ // Stream Padding must be a multiple of four bytes (empty
+ // Stream Padding is OK).
+ if (coder->pos != 0) {
+ ++*in_pos;
+ ++coder->progress_in;
+ return LZMA_DATA_ERROR;
+ }
+
+ // Prepare to decode the next Stream.
+ return_if_error(stream_decoder_reset(coder, allocator));
+ break;
+
+ case SEQ_ERROR:
+ if (!coder->fail_fast) {
+ // Let the application get all data before the point
+ // where the error was detected. This matches the
+ // behavior of single-threaded use.
+ //
+ // FIXME? Some errors (LZMA_MEM_ERROR) don't get here,
+ // they are returned immediately. Thus in rare cases
+ // the output will be less than in the single-threaded
+ // mode. Maybe this doesn't matter much in practice.
+ return_if_error(read_output_and_wait(coder, allocator,
+ out, out_pos, out_size,
+ NULL, true, &wait_abs, &has_blocked));
+
+ // We get here only if the error happened in the main
+ // thread, for example, unsupported Block Header.
+ if (!lzma_outq_is_empty(&coder->outq))
+ return LZMA_OK;
+ }
+
+ // We only get here if no errors were detected by the worker
+ // threads. Errors from worker threads would have already been
+ // returned by the call to read_output_and_wait() above.
+ return coder->pending_error;
+
+ default:
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+ // Never reached
+}
+
+
+static void
+stream_decoder_mt_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ struct lzma_stream_coder *coder = coder_ptr;
+
+ threads_end(coder, allocator);
+ lzma_outq_end(&coder->outq, allocator);
+
+ lzma_next_end(&coder->block_decoder, allocator);
+ lzma_filters_free(coder->filters, allocator);
+ lzma_index_hash_end(coder->index_hash, allocator);
+
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_check
+stream_decoder_mt_get_check(const void *coder_ptr)
+{
+ const struct lzma_stream_coder *coder = coder_ptr;
+ return coder->stream_flags.check;
+}
+
+
+static lzma_ret
+stream_decoder_mt_memconfig(void *coder_ptr, uint64_t *memusage,
+ uint64_t *old_memlimit, uint64_t new_memlimit)
+{
+ // NOTE: This function gets/sets memlimit_stop. For now,
+ // memlimit_threading cannot be modified after initialization.
+ //
+ // *memusage will include cached memory too. Excluding cached memory
+ // would be misleading and it wouldn't help the applications to
+ // know how much memory is actually needed to decompress the file
+ // because the higher the number of threads and the memlimits are
+ // the more memory the decoder may use.
+ //
+ // Setting a new limit includes the cached memory too and too low
+ // limits will be rejected. Alternative could be to free the cached
+ // memory immediately if that helps to bring the limit down but
+ // the current way is the simplest. It's unlikely that limit needs
+ // to be lowered in the middle of a file anyway; the typical reason
+ // to want a new limit is to increase after LZMA_MEMLIMIT_ERROR
+ // and even such use isn't common.
+ struct lzma_stream_coder *coder = coder_ptr;
+
+ mythread_sync(coder->mutex) {
+ *memusage = coder->mem_direct_mode
+ + coder->mem_in_use
+ + coder->mem_cached
+ + coder->outq.mem_allocated;
+ }
+
+ // If no filter chains are allocated, *memusage may be zero.
+ // Always return at least LZMA_MEMUSAGE_BASE.
+ if (*memusage < LZMA_MEMUSAGE_BASE)
+ *memusage = LZMA_MEMUSAGE_BASE;
+
+ *old_memlimit = coder->memlimit_stop;
+
+ if (new_memlimit != 0) {
+ if (new_memlimit < *memusage)
+ return LZMA_MEMLIMIT_ERROR;
+
+ coder->memlimit_stop = new_memlimit;
+ }
+
+ return LZMA_OK;
+}
+
+
+static void
+stream_decoder_mt_get_progress(void *coder_ptr,
+ uint64_t *progress_in, uint64_t *progress_out)
+{
+ struct lzma_stream_coder *coder = coder_ptr;
+
+ // Lock coder->mutex to prevent finishing threads from moving their
+ // progress info from the worker_thread structure to lzma_stream_coder.
+ mythread_sync(coder->mutex) {
+ *progress_in = coder->progress_in;
+ *progress_out = coder->progress_out;
+
+ for (size_t i = 0; i < coder->threads_initialized; ++i) {
+ mythread_sync(coder->threads[i].mutex) {
+ *progress_in += coder->threads[i].progress_in;
+ *progress_out += coder->threads[i]
+ .progress_out;
+ }
+ }
+ }
+
+ return;
+}
+
+
+static lzma_ret
+stream_decoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_mt *options)
+{
+ struct lzma_stream_coder *coder;
+
+ if (options->threads == 0 || options->threads > LZMA_THREADS_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ if (options->flags & ~LZMA_SUPPORTED_FLAGS)
+ return LZMA_OPTIONS_ERROR;
+
+ lzma_next_coder_init(&stream_decoder_mt_init, next, allocator);
+
+ coder = next->coder;
+ if (!coder) {
+ coder = lzma_alloc(sizeof(struct lzma_stream_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+
+ if (mythread_mutex_init(&coder->mutex)) {
+ lzma_free(coder, allocator);
+ return LZMA_MEM_ERROR;
+ }
+
+ if (mythread_cond_init(&coder->cond)) {
+ mythread_mutex_destroy(&coder->mutex);
+ lzma_free(coder, allocator);
+ return LZMA_MEM_ERROR;
+ }
+
+ next->code = &stream_decode_mt;
+ next->end = &stream_decoder_mt_end;
+ next->get_check = &stream_decoder_mt_get_check;
+ next->memconfig = &stream_decoder_mt_memconfig;
+ next->get_progress = &stream_decoder_mt_get_progress;
+
+ coder->filters[0].id = LZMA_VLI_UNKNOWN;
+ memzero(&coder->outq, sizeof(coder->outq));
+
+ coder->block_decoder = LZMA_NEXT_CODER_INIT;
+ coder->mem_direct_mode = 0;
+
+ coder->index_hash = NULL;
+ coder->threads = NULL;
+ coder->threads_free = NULL;
+ coder->threads_initialized = 0;
+ }
+
+ // Cleanup old filter chain if one remains after unfinished decoding
+ // of a previous Stream.
+ lzma_filters_free(coder->filters, allocator);
+
+ // By allocating threads from scratch we can start memory-usage
+ // accounting from scratch, too. Changes in filter and block sizes may
+ // affect number of threads.
+ //
+ // FIXME? Reusing should be easy but unlike the single-threaded
+ // decoder, with some types of input file combinations reusing
+ // could leave quite a lot of memory allocated but unused (first
+ // file could allocate a lot, the next files could use fewer
+ // threads and some of the allocations from the first file would not
+ // get freed unless memlimit_threading forces us to clear caches).
+ //
+ // NOTE: The direct mode decoder isn't freed here if one exists.
+ // It will be reused or freed as needed in the main loop.
+ threads_end(coder, allocator);
+
+ // All memusage counters start at 0 (including mem_direct_mode).
+ // The little extra that is needed for the structs in this file
+ // get accounted well enough by the filter chain memory usage
+ // which adds LZMA_MEMUSAGE_BASE for each chain. However,
+ // stream_decoder_mt_memconfig() has to handle this specially so that
+ // it will never return less than LZMA_MEMUSAGE_BASE as memory usage.
+ coder->mem_in_use = 0;
+ coder->mem_cached = 0;
+ coder->mem_next_block = 0;
+
+ coder->progress_in = 0;
+ coder->progress_out = 0;
+
+ coder->sequence = SEQ_STREAM_HEADER;
+ coder->thread_error = LZMA_OK;
+ coder->pending_error = LZMA_OK;
+ coder->thr = NULL;
+
+ coder->timeout = options->timeout;
+
+ coder->memlimit_threading = my_max(1, options->memlimit_threading);
+ coder->memlimit_stop = my_max(1, options->memlimit_stop);
+ if (coder->memlimit_threading > coder->memlimit_stop)
+ coder->memlimit_threading = coder->memlimit_stop;
+
+ coder->tell_no_check = (options->flags & LZMA_TELL_NO_CHECK) != 0;
+ coder->tell_unsupported_check
+ = (options->flags & LZMA_TELL_UNSUPPORTED_CHECK) != 0;
+ coder->tell_any_check = (options->flags & LZMA_TELL_ANY_CHECK) != 0;
+ coder->ignore_check = (options->flags & LZMA_IGNORE_CHECK) != 0;
+ coder->concatenated = (options->flags & LZMA_CONCATENATED) != 0;
+ coder->fail_fast = (options->flags & LZMA_FAIL_FAST) != 0;
+
+ coder->first_stream = true;
+ coder->out_was_filled = false;
+ coder->pos = 0;
+
+ coder->threads_max = options->threads;
+
+ return_if_error(lzma_outq_init(&coder->outq, allocator,
+ coder->threads_max));
+
+ return stream_decoder_reset(coder, allocator);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_stream_decoder_mt(lzma_stream *strm, const lzma_mt *options)
+{
+ lzma_next_strm_init(stream_decoder_mt_init, strm, options);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/stream_encoder.c b/src/liblzma/common/stream_encoder.c
new file mode 100644
index 0000000..ee92046
--- /dev/null
+++ b/src/liblzma/common/stream_encoder.c
@@ -0,0 +1,355 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file stream_encoder.c
+/// \brief Encodes .xz Streams
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "block_encoder.h"
+#include "index_encoder.h"
+
+
+typedef struct {
+ enum {
+ SEQ_STREAM_HEADER,
+ SEQ_BLOCK_INIT,
+ SEQ_BLOCK_HEADER,
+ SEQ_BLOCK_ENCODE,
+ SEQ_INDEX_ENCODE,
+ SEQ_STREAM_FOOTER,
+ } sequence;
+
+ /// True if Block encoder has been initialized by
+ /// stream_encoder_init() or stream_encoder_update()
+ /// and thus doesn't need to be initialized in stream_encode().
+ bool block_encoder_is_initialized;
+
+ /// Block
+ lzma_next_coder block_encoder;
+
+ /// Options for the Block encoder
+ lzma_block block_options;
+
+ /// The filter chain currently in use
+ lzma_filter filters[LZMA_FILTERS_MAX + 1];
+
+ /// Index encoder. This is separate from Block encoder, because this
+ /// doesn't take much memory, and when encoding multiple Streams
+ /// with the same encoding options we avoid reallocating memory.
+ lzma_next_coder index_encoder;
+
+ /// Index to hold sizes of the Blocks
+ lzma_index *index;
+
+ /// Read position in buffer[]
+ size_t buffer_pos;
+
+ /// Total number of bytes in buffer[]
+ size_t buffer_size;
+
+ /// Buffer to hold Stream Header, Block Header, and Stream Footer.
+ /// Block Header has biggest maximum size.
+ uint8_t buffer[LZMA_BLOCK_HEADER_SIZE_MAX];
+} lzma_stream_coder;
+
+
+static lzma_ret
+block_encoder_init(lzma_stream_coder *coder, const lzma_allocator *allocator)
+{
+ // Prepare the Block options. Even though Block encoder doesn't need
+ // compressed_size, uncompressed_size, and header_size to be
+ // initialized, it is a good idea to do it here, because this way
+ // we catch if someone gave us Filter ID that cannot be used in
+ // Blocks/Streams.
+ coder->block_options.compressed_size = LZMA_VLI_UNKNOWN;
+ coder->block_options.uncompressed_size = LZMA_VLI_UNKNOWN;
+
+ return_if_error(lzma_block_header_size(&coder->block_options));
+
+ // Initialize the actual Block encoder.
+ return lzma_block_encoder_init(&coder->block_encoder, allocator,
+ &coder->block_options);
+}
+
+
+static lzma_ret
+stream_encode(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_stream_coder *coder = coder_ptr;
+
+ // Main loop
+ while (*out_pos < out_size)
+ switch (coder->sequence) {
+ case SEQ_STREAM_HEADER:
+ case SEQ_BLOCK_HEADER:
+ case SEQ_STREAM_FOOTER:
+ lzma_bufcpy(coder->buffer, &coder->buffer_pos,
+ coder->buffer_size, out, out_pos, out_size);
+ if (coder->buffer_pos < coder->buffer_size)
+ return LZMA_OK;
+
+ if (coder->sequence == SEQ_STREAM_FOOTER)
+ return LZMA_STREAM_END;
+
+ coder->buffer_pos = 0;
+ ++coder->sequence;
+ break;
+
+ case SEQ_BLOCK_INIT: {
+ if (*in_pos == in_size) {
+ // If we are requested to flush or finish the current
+ // Block, return LZMA_STREAM_END immediately since
+ // there's nothing to do.
+ if (action != LZMA_FINISH)
+ return action == LZMA_RUN
+ ? LZMA_OK : LZMA_STREAM_END;
+
+ // The application had used LZMA_FULL_FLUSH to finish
+ // the previous Block, but now wants to finish without
+ // encoding new data, or it is simply creating an
+ // empty Stream with no Blocks.
+ //
+ // Initialize the Index encoder, and continue to
+ // actually encoding the Index.
+ return_if_error(lzma_index_encoder_init(
+ &coder->index_encoder, allocator,
+ coder->index));
+ coder->sequence = SEQ_INDEX_ENCODE;
+ break;
+ }
+
+ // Initialize the Block encoder unless it was already
+ // initialized by stream_encoder_init() or
+ // stream_encoder_update().
+ if (!coder->block_encoder_is_initialized)
+ return_if_error(block_encoder_init(coder, allocator));
+
+ // Make it false so that we don't skip the initialization
+ // with the next Block.
+ coder->block_encoder_is_initialized = false;
+
+ // Encode the Block Header. This shouldn't fail since we have
+ // already initialized the Block encoder.
+ if (lzma_block_header_encode(&coder->block_options,
+ coder->buffer) != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ coder->buffer_size = coder->block_options.header_size;
+ coder->sequence = SEQ_BLOCK_HEADER;
+ break;
+ }
+
+ case SEQ_BLOCK_ENCODE: {
+ static const lzma_action convert[LZMA_ACTION_MAX + 1] = {
+ LZMA_RUN,
+ LZMA_SYNC_FLUSH,
+ LZMA_FINISH,
+ LZMA_FINISH,
+ LZMA_FINISH,
+ };
+
+ const lzma_ret ret = coder->block_encoder.code(
+ coder->block_encoder.coder, allocator,
+ in, in_pos, in_size,
+ out, out_pos, out_size, convert[action]);
+ if (ret != LZMA_STREAM_END || action == LZMA_SYNC_FLUSH)
+ return ret;
+
+ // Add a new Index Record.
+ const lzma_vli unpadded_size = lzma_block_unpadded_size(
+ &coder->block_options);
+ assert(unpadded_size != 0);
+ return_if_error(lzma_index_append(coder->index, allocator,
+ unpadded_size,
+ coder->block_options.uncompressed_size));
+
+ coder->sequence = SEQ_BLOCK_INIT;
+ break;
+ }
+
+ case SEQ_INDEX_ENCODE: {
+ // Call the Index encoder. It doesn't take any input, so
+ // those pointers can be NULL.
+ const lzma_ret ret = coder->index_encoder.code(
+ coder->index_encoder.coder, allocator,
+ NULL, NULL, 0,
+ out, out_pos, out_size, LZMA_RUN);
+ if (ret != LZMA_STREAM_END)
+ return ret;
+
+ // Encode the Stream Footer into coder->buffer.
+ const lzma_stream_flags stream_flags = {
+ .version = 0,
+ .backward_size = lzma_index_size(coder->index),
+ .check = coder->block_options.check,
+ };
+
+ if (lzma_stream_footer_encode(&stream_flags, coder->buffer)
+ != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ coder->buffer_size = LZMA_STREAM_HEADER_SIZE;
+ coder->sequence = SEQ_STREAM_FOOTER;
+ break;
+ }
+
+ default:
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+ return LZMA_OK;
+}
+
+
+static void
+stream_encoder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_stream_coder *coder = coder_ptr;
+
+ lzma_next_end(&coder->block_encoder, allocator);
+ lzma_next_end(&coder->index_encoder, allocator);
+ lzma_index_end(coder->index, allocator);
+
+ lzma_filters_free(coder->filters, allocator);
+
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+stream_encoder_update(void *coder_ptr, const lzma_allocator *allocator,
+ const lzma_filter *filters,
+ const lzma_filter *reversed_filters)
+{
+ lzma_stream_coder *coder = coder_ptr;
+ lzma_ret ret;
+
+ // Make a copy to a temporary buffer first. This way it is easier
+ // to keep the encoder state unchanged if an error occurs with
+ // lzma_filters_copy().
+ lzma_filter temp[LZMA_FILTERS_MAX + 1];
+ return_if_error(lzma_filters_copy(filters, temp, allocator));
+
+ if (coder->sequence <= SEQ_BLOCK_INIT) {
+ // There is no incomplete Block waiting to be finished,
+ // thus we can change the whole filter chain. Start by
+ // trying to initialize the Block encoder with the new
+ // chain. This way we detect if the chain is valid.
+ coder->block_encoder_is_initialized = false;
+ coder->block_options.filters = temp;
+ ret = block_encoder_init(coder, allocator);
+ coder->block_options.filters = coder->filters;
+ if (ret != LZMA_OK)
+ goto error;
+
+ coder->block_encoder_is_initialized = true;
+
+ } else if (coder->sequence <= SEQ_BLOCK_ENCODE) {
+ // We are in the middle of a Block. Try to update only
+ // the filter-specific options.
+ ret = coder->block_encoder.update(
+ coder->block_encoder.coder, allocator,
+ filters, reversed_filters);
+ if (ret != LZMA_OK)
+ goto error;
+ } else {
+ // Trying to update the filter chain when we are already
+ // encoding Index or Stream Footer.
+ ret = LZMA_PROG_ERROR;
+ goto error;
+ }
+
+ // Free the options of the old chain.
+ lzma_filters_free(coder->filters, allocator);
+
+ // Copy the new filter chain in place.
+ memcpy(coder->filters, temp, sizeof(temp));
+
+ return LZMA_OK;
+
+error:
+ lzma_filters_free(temp, allocator);
+ return ret;
+}
+
+
+static lzma_ret
+stream_encoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter *filters, lzma_check check)
+{
+ lzma_next_coder_init(&stream_encoder_init, next, allocator);
+
+ if (filters == NULL)
+ return LZMA_PROG_ERROR;
+
+ lzma_stream_coder *coder = next->coder;
+
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_stream_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &stream_encode;
+ next->end = &stream_encoder_end;
+ next->update = &stream_encoder_update;
+
+ coder->filters[0].id = LZMA_VLI_UNKNOWN;
+ coder->block_encoder = LZMA_NEXT_CODER_INIT;
+ coder->index_encoder = LZMA_NEXT_CODER_INIT;
+ coder->index = NULL;
+ }
+
+ // Basic initializations
+ coder->sequence = SEQ_STREAM_HEADER;
+ coder->block_options.version = 0;
+ coder->block_options.check = check;
+
+ // Initialize the Index
+ lzma_index_end(coder->index, allocator);
+ coder->index = lzma_index_init(allocator);
+ if (coder->index == NULL)
+ return LZMA_MEM_ERROR;
+
+ // Encode the Stream Header
+ lzma_stream_flags stream_flags = {
+ .version = 0,
+ .check = check,
+ };
+ return_if_error(lzma_stream_header_encode(
+ &stream_flags, coder->buffer));
+
+ coder->buffer_pos = 0;
+ coder->buffer_size = LZMA_STREAM_HEADER_SIZE;
+
+ // Initialize the Block encoder. This way we detect unsupported
+ // filter chains when initializing the Stream encoder instead of
+ // giving an error after Stream Header has already been written out.
+ return stream_encoder_update(coder, allocator, filters, NULL);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_stream_encoder(lzma_stream *strm,
+ const lzma_filter *filters, lzma_check check)
+{
+ lzma_next_strm_init(stream_encoder_init, strm, filters, check);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+ strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true;
+ strm->internal->supported_actions[LZMA_FULL_FLUSH] = true;
+ strm->internal->supported_actions[LZMA_FULL_BARRIER] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/stream_encoder_mt.c b/src/liblzma/common/stream_encoder_mt.c
new file mode 100644
index 0000000..f64de9b
--- /dev/null
+++ b/src/liblzma/common/stream_encoder_mt.c
@@ -0,0 +1,1283 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file stream_encoder_mt.c
+/// \brief Multithreaded .xz Stream encoder
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "filter_encoder.h"
+#include "easy_preset.h"
+#include "block_encoder.h"
+#include "block_buffer_encoder.h"
+#include "index_encoder.h"
+#include "outqueue.h"
+
+
+/// Maximum supported block size. This makes it simpler to prevent integer
+/// overflows if we are given unusually large block size.
+#define BLOCK_SIZE_MAX (UINT64_MAX / LZMA_THREADS_MAX)
+
+
+typedef enum {
+ /// Waiting for work.
+ THR_IDLE,
+
+ /// Encoding is in progress.
+ THR_RUN,
+
+ /// Encoding is in progress but no more input data will
+ /// be read.
+ THR_FINISH,
+
+ /// The main thread wants the thread to stop whatever it was doing
+ /// but not exit.
+ THR_STOP,
+
+ /// The main thread wants the thread to exit. We could use
+ /// cancellation but since there's stopped anyway, this is lazier.
+ THR_EXIT,
+
+} worker_state;
+
+typedef struct lzma_stream_coder_s lzma_stream_coder;
+
+typedef struct worker_thread_s worker_thread;
+struct worker_thread_s {
+ worker_state state;
+
+ /// Input buffer of coder->block_size bytes. The main thread will
+ /// put new input into this and update in_size accordingly. Once
+ /// no more input is coming, state will be set to THR_FINISH.
+ uint8_t *in;
+
+ /// Amount of data available in the input buffer. This is modified
+ /// only by the main thread.
+ size_t in_size;
+
+ /// Output buffer for this thread. This is set by the main
+ /// thread every time a new Block is started with this thread
+ /// structure.
+ lzma_outbuf *outbuf;
+
+ /// Pointer to the main structure is needed when putting this
+ /// thread back to the stack of free threads.
+ lzma_stream_coder *coder;
+
+ /// The allocator is set by the main thread. Since a copy of the
+ /// pointer is kept here, the application must not change the
+ /// allocator before calling lzma_end().
+ const lzma_allocator *allocator;
+
+ /// Amount of uncompressed data that has already been compressed.
+ uint64_t progress_in;
+
+ /// Amount of compressed data that is ready.
+ uint64_t progress_out;
+
+ /// Block encoder
+ lzma_next_coder block_encoder;
+
+ /// Compression options for this Block
+ lzma_block block_options;
+
+ /// Filter chain for this thread. By copying the filters array
+ /// to each thread it is possible to change the filter chain
+ /// between Blocks using lzma_filters_update().
+ lzma_filter filters[LZMA_FILTERS_MAX + 1];
+
+ /// Next structure in the stack of free worker threads.
+ worker_thread *next;
+
+ mythread_mutex mutex;
+ mythread_cond cond;
+
+ /// The ID of this thread is used to join the thread
+ /// when it's not needed anymore.
+ mythread thread_id;
+};
+
+
+struct lzma_stream_coder_s {
+ enum {
+ SEQ_STREAM_HEADER,
+ SEQ_BLOCK,
+ SEQ_INDEX,
+ SEQ_STREAM_FOOTER,
+ } sequence;
+
+ /// Start a new Block every block_size bytes of input unless
+ /// LZMA_FULL_FLUSH or LZMA_FULL_BARRIER is used earlier.
+ size_t block_size;
+
+ /// The filter chain to use for the next Block.
+ /// This can be updated using lzma_filters_update()
+ /// after LZMA_FULL_BARRIER or LZMA_FULL_FLUSH.
+ lzma_filter filters[LZMA_FILTERS_MAX + 1];
+
+ /// A copy of filters[] will be put here when attempting to get
+ /// a new worker thread. This will be copied to a worker thread
+ /// when a thread becomes free and then this cache is marked as
+ /// empty by setting [0].id = LZMA_VLI_UNKNOWN. Without this cache
+ /// the filter options from filters[] would get uselessly copied
+ /// multiple times (allocated and freed) when waiting for a new free
+ /// worker thread.
+ ///
+ /// This is freed if filters[] is updated via lzma_filters_update().
+ lzma_filter filters_cache[LZMA_FILTERS_MAX + 1];
+
+
+ /// Index to hold sizes of the Blocks
+ lzma_index *index;
+
+ /// Index encoder
+ lzma_next_coder index_encoder;
+
+
+ /// Stream Flags for encoding the Stream Header and Stream Footer.
+ lzma_stream_flags stream_flags;
+
+ /// Buffer to hold Stream Header and Stream Footer.
+ uint8_t header[LZMA_STREAM_HEADER_SIZE];
+
+ /// Read position in header[]
+ size_t header_pos;
+
+
+ /// Output buffer queue for compressed data
+ lzma_outq outq;
+
+ /// How much memory to allocate for each lzma_outbuf.buf
+ size_t outbuf_alloc_size;
+
+
+ /// Maximum wait time if cannot use all the input and cannot
+ /// fill the output buffer. This is in milliseconds.
+ uint32_t timeout;
+
+
+ /// Error code from a worker thread
+ lzma_ret thread_error;
+
+ /// Array of allocated thread-specific structures
+ worker_thread *threads;
+
+ /// Number of structures in "threads" above. This is also the
+ /// number of threads that will be created at maximum.
+ uint32_t threads_max;
+
+ /// Number of thread structures that have been initialized, and
+ /// thus the number of worker threads actually created so far.
+ uint32_t threads_initialized;
+
+ /// Stack of free threads. When a thread finishes, it puts itself
+ /// back into this stack. This starts as empty because threads
+ /// are created only when actually needed.
+ worker_thread *threads_free;
+
+ /// The most recent worker thread to which the main thread writes
+ /// the new input from the application.
+ worker_thread *thr;
+
+
+ /// Amount of uncompressed data in Blocks that have already
+ /// been finished.
+ uint64_t progress_in;
+
+ /// Amount of compressed data in Stream Header + Blocks that
+ /// have already been finished.
+ uint64_t progress_out;
+
+
+ mythread_mutex mutex;
+ mythread_cond cond;
+};
+
+
+/// Tell the main thread that something has gone wrong.
+static void
+worker_error(worker_thread *thr, lzma_ret ret)
+{
+ assert(ret != LZMA_OK);
+ assert(ret != LZMA_STREAM_END);
+
+ mythread_sync(thr->coder->mutex) {
+ if (thr->coder->thread_error == LZMA_OK)
+ thr->coder->thread_error = ret;
+
+ mythread_cond_signal(&thr->coder->cond);
+ }
+
+ return;
+}
+
+
+static worker_state
+worker_encode(worker_thread *thr, size_t *out_pos, worker_state state)
+{
+ assert(thr->progress_in == 0);
+ assert(thr->progress_out == 0);
+
+ // Set the Block options.
+ thr->block_options = (lzma_block){
+ .version = 0,
+ .check = thr->coder->stream_flags.check,
+ .compressed_size = thr->outbuf->allocated,
+ .uncompressed_size = thr->coder->block_size,
+ .filters = thr->filters,
+ };
+
+ // Calculate maximum size of the Block Header. This amount is
+ // reserved in the beginning of the buffer so that Block Header
+ // along with Compressed Size and Uncompressed Size can be
+ // written there.
+ lzma_ret ret = lzma_block_header_size(&thr->block_options);
+ if (ret != LZMA_OK) {
+ worker_error(thr, ret);
+ return THR_STOP;
+ }
+
+ // Initialize the Block encoder.
+ ret = lzma_block_encoder_init(&thr->block_encoder,
+ thr->allocator, &thr->block_options);
+ if (ret != LZMA_OK) {
+ worker_error(thr, ret);
+ return THR_STOP;
+ }
+
+ size_t in_pos = 0;
+ size_t in_size = 0;
+
+ *out_pos = thr->block_options.header_size;
+ const size_t out_size = thr->outbuf->allocated;
+
+ do {
+ mythread_sync(thr->mutex) {
+ // Store in_pos and *out_pos into *thr so that
+ // an application may read them via
+ // lzma_get_progress() to get progress information.
+ //
+ // NOTE: These aren't updated when the encoding
+ // finishes. Instead, the final values are taken
+ // later from thr->outbuf.
+ thr->progress_in = in_pos;
+ thr->progress_out = *out_pos;
+
+ while (in_size == thr->in_size
+ && thr->state == THR_RUN)
+ mythread_cond_wait(&thr->cond, &thr->mutex);
+
+ state = thr->state;
+ in_size = thr->in_size;
+ }
+
+ // Return if we were asked to stop or exit.
+ if (state >= THR_STOP)
+ return state;
+
+ lzma_action action = state == THR_FINISH
+ ? LZMA_FINISH : LZMA_RUN;
+
+ // Limit the amount of input given to the Block encoder
+ // at once. This way this thread can react fairly quickly
+ // if the main thread wants us to stop or exit.
+ static const size_t in_chunk_max = 16384;
+ size_t in_limit = in_size;
+ if (in_size - in_pos > in_chunk_max) {
+ in_limit = in_pos + in_chunk_max;
+ action = LZMA_RUN;
+ }
+
+ ret = thr->block_encoder.code(
+ thr->block_encoder.coder, thr->allocator,
+ thr->in, &in_pos, in_limit, thr->outbuf->buf,
+ out_pos, out_size, action);
+ } while (ret == LZMA_OK && *out_pos < out_size);
+
+ switch (ret) {
+ case LZMA_STREAM_END:
+ assert(state == THR_FINISH);
+
+ // Encode the Block Header. By doing it after
+ // the compression, we can store the Compressed Size
+ // and Uncompressed Size fields.
+ ret = lzma_block_header_encode(&thr->block_options,
+ thr->outbuf->buf);
+ if (ret != LZMA_OK) {
+ worker_error(thr, ret);
+ return THR_STOP;
+ }
+
+ break;
+
+ case LZMA_OK:
+ // The data was incompressible. Encode it using uncompressed
+ // LZMA2 chunks.
+ //
+ // First wait that we have gotten all the input.
+ mythread_sync(thr->mutex) {
+ while (thr->state == THR_RUN)
+ mythread_cond_wait(&thr->cond, &thr->mutex);
+
+ state = thr->state;
+ in_size = thr->in_size;
+ }
+
+ if (state >= THR_STOP)
+ return state;
+
+ // Do the encoding. This takes care of the Block Header too.
+ *out_pos = 0;
+ ret = lzma_block_uncomp_encode(&thr->block_options,
+ thr->in, in_size, thr->outbuf->buf,
+ out_pos, out_size);
+
+ // It shouldn't fail.
+ if (ret != LZMA_OK) {
+ worker_error(thr, LZMA_PROG_ERROR);
+ return THR_STOP;
+ }
+
+ break;
+
+ default:
+ worker_error(thr, ret);
+ return THR_STOP;
+ }
+
+ // Set the size information that will be read by the main thread
+ // to write the Index field.
+ thr->outbuf->unpadded_size
+ = lzma_block_unpadded_size(&thr->block_options);
+ assert(thr->outbuf->unpadded_size != 0);
+ thr->outbuf->uncompressed_size = thr->block_options.uncompressed_size;
+
+ return THR_FINISH;
+}
+
+
+static MYTHREAD_RET_TYPE
+worker_start(void *thr_ptr)
+{
+ worker_thread *thr = thr_ptr;
+ worker_state state = THR_IDLE; // Init to silence a warning
+
+ while (true) {
+ // Wait for work.
+ mythread_sync(thr->mutex) {
+ while (true) {
+ // The thread is already idle so if we are
+ // requested to stop, just set the state.
+ if (thr->state == THR_STOP) {
+ thr->state = THR_IDLE;
+ mythread_cond_signal(&thr->cond);
+ }
+
+ state = thr->state;
+ if (state != THR_IDLE)
+ break;
+
+ mythread_cond_wait(&thr->cond, &thr->mutex);
+ }
+ }
+
+ size_t out_pos = 0;
+
+ assert(state != THR_IDLE);
+ assert(state != THR_STOP);
+
+ if (state <= THR_FINISH)
+ state = worker_encode(thr, &out_pos, state);
+
+ if (state == THR_EXIT)
+ break;
+
+ // Mark the thread as idle unless the main thread has
+ // told us to exit. Signal is needed for the case
+ // where the main thread is waiting for the threads to stop.
+ mythread_sync(thr->mutex) {
+ if (thr->state != THR_EXIT) {
+ thr->state = THR_IDLE;
+ mythread_cond_signal(&thr->cond);
+ }
+ }
+
+ mythread_sync(thr->coder->mutex) {
+ // If no errors occurred, make the encoded data
+ // available to be copied out.
+ if (state == THR_FINISH) {
+ thr->outbuf->pos = out_pos;
+ thr->outbuf->finished = true;
+ }
+
+ // Update the main progress info.
+ thr->coder->progress_in
+ += thr->outbuf->uncompressed_size;
+ thr->coder->progress_out += out_pos;
+ thr->progress_in = 0;
+ thr->progress_out = 0;
+
+ // Return this thread to the stack of free threads.
+ thr->next = thr->coder->threads_free;
+ thr->coder->threads_free = thr;
+
+ mythread_cond_signal(&thr->coder->cond);
+ }
+ }
+
+ // Exiting, free the resources.
+ lzma_filters_free(thr->filters, thr->allocator);
+
+ mythread_mutex_destroy(&thr->mutex);
+ mythread_cond_destroy(&thr->cond);
+
+ lzma_next_end(&thr->block_encoder, thr->allocator);
+ lzma_free(thr->in, thr->allocator);
+ return MYTHREAD_RET_VALUE;
+}
+
+
+/// Make the threads stop but not exit. Optionally wait for them to stop.
+static void
+threads_stop(lzma_stream_coder *coder, bool wait_for_threads)
+{
+ // Tell the threads to stop.
+ for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+ mythread_sync(coder->threads[i].mutex) {
+ coder->threads[i].state = THR_STOP;
+ mythread_cond_signal(&coder->threads[i].cond);
+ }
+ }
+
+ if (!wait_for_threads)
+ return;
+
+ // Wait for the threads to settle in the idle state.
+ for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+ mythread_sync(coder->threads[i].mutex) {
+ while (coder->threads[i].state != THR_IDLE)
+ mythread_cond_wait(&coder->threads[i].cond,
+ &coder->threads[i].mutex);
+ }
+ }
+
+ return;
+}
+
+
+/// Stop the threads and free the resources associated with them.
+/// Wait until the threads have exited.
+static void
+threads_end(lzma_stream_coder *coder, const lzma_allocator *allocator)
+{
+ for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+ mythread_sync(coder->threads[i].mutex) {
+ coder->threads[i].state = THR_EXIT;
+ mythread_cond_signal(&coder->threads[i].cond);
+ }
+ }
+
+ for (uint32_t i = 0; i < coder->threads_initialized; ++i) {
+ int ret = mythread_join(coder->threads[i].thread_id);
+ assert(ret == 0);
+ (void)ret;
+ }
+
+ lzma_free(coder->threads, allocator);
+ return;
+}
+
+
+/// Initialize a new worker_thread structure and create a new thread.
+static lzma_ret
+initialize_new_thread(lzma_stream_coder *coder,
+ const lzma_allocator *allocator)
+{
+ worker_thread *thr = &coder->threads[coder->threads_initialized];
+
+ thr->in = lzma_alloc(coder->block_size, allocator);
+ if (thr->in == NULL)
+ return LZMA_MEM_ERROR;
+
+ if (mythread_mutex_init(&thr->mutex))
+ goto error_mutex;
+
+ if (mythread_cond_init(&thr->cond))
+ goto error_cond;
+
+ thr->state = THR_IDLE;
+ thr->allocator = allocator;
+ thr->coder = coder;
+ thr->progress_in = 0;
+ thr->progress_out = 0;
+ thr->block_encoder = LZMA_NEXT_CODER_INIT;
+ thr->filters[0].id = LZMA_VLI_UNKNOWN;
+
+ if (mythread_create(&thr->thread_id, &worker_start, thr))
+ goto error_thread;
+
+ ++coder->threads_initialized;
+ coder->thr = thr;
+
+ return LZMA_OK;
+
+error_thread:
+ mythread_cond_destroy(&thr->cond);
+
+error_cond:
+ mythread_mutex_destroy(&thr->mutex);
+
+error_mutex:
+ lzma_free(thr->in, allocator);
+ return LZMA_MEM_ERROR;
+}
+
+
+static lzma_ret
+get_thread(lzma_stream_coder *coder, const lzma_allocator *allocator)
+{
+ // If there are no free output subqueues, there is no
+ // point to try getting a thread.
+ if (!lzma_outq_has_buf(&coder->outq))
+ return LZMA_OK;
+
+ // That's also true if we cannot allocate memory for the output
+ // buffer in the output queue.
+ return_if_error(lzma_outq_prealloc_buf(&coder->outq, allocator,
+ coder->outbuf_alloc_size));
+
+ // Make a thread-specific copy of the filter chain. Put it in
+ // the cache array first so that if we cannot get a new thread yet,
+ // the allocation is ready when we try again.
+ if (coder->filters_cache[0].id == LZMA_VLI_UNKNOWN)
+ return_if_error(lzma_filters_copy(
+ coder->filters, coder->filters_cache, allocator));
+
+ // If there is a free structure on the stack, use it.
+ mythread_sync(coder->mutex) {
+ if (coder->threads_free != NULL) {
+ coder->thr = coder->threads_free;
+ coder->threads_free = coder->threads_free->next;
+ }
+ }
+
+ if (coder->thr == NULL) {
+ // If there are no uninitialized structures left, return.
+ if (coder->threads_initialized == coder->threads_max)
+ return LZMA_OK;
+
+ // Initialize a new thread.
+ return_if_error(initialize_new_thread(coder, allocator));
+ }
+
+ // Reset the parts of the thread state that have to be done
+ // in the main thread.
+ mythread_sync(coder->thr->mutex) {
+ coder->thr->state = THR_RUN;
+ coder->thr->in_size = 0;
+ coder->thr->outbuf = lzma_outq_get_buf(&coder->outq, NULL);
+
+ // Free the old thread-specific filter options and replace
+ // them with the already-allocated new options from
+ // coder->filters_cache[]. Then mark the cache as empty.
+ lzma_filters_free(coder->thr->filters, allocator);
+ memcpy(coder->thr->filters, coder->filters_cache,
+ sizeof(coder->filters_cache));
+ coder->filters_cache[0].id = LZMA_VLI_UNKNOWN;
+
+ mythread_cond_signal(&coder->thr->cond);
+ }
+
+ return LZMA_OK;
+}
+
+
+static lzma_ret
+stream_encode_in(lzma_stream_coder *coder, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, lzma_action action)
+{
+ while (*in_pos < in_size
+ || (coder->thr != NULL && action != LZMA_RUN)) {
+ if (coder->thr == NULL) {
+ // Get a new thread.
+ const lzma_ret ret = get_thread(coder, allocator);
+ if (coder->thr == NULL)
+ return ret;
+ }
+
+ // Copy the input data to thread's buffer.
+ size_t thr_in_size = coder->thr->in_size;
+ lzma_bufcpy(in, in_pos, in_size, coder->thr->in,
+ &thr_in_size, coder->block_size);
+
+ // Tell the Block encoder to finish if
+ // - it has got block_size bytes of input; or
+ // - all input was used and LZMA_FINISH, LZMA_FULL_FLUSH,
+ // or LZMA_FULL_BARRIER was used.
+ //
+ // TODO: LZMA_SYNC_FLUSH and LZMA_SYNC_BARRIER.
+ const bool finish = thr_in_size == coder->block_size
+ || (*in_pos == in_size && action != LZMA_RUN);
+
+ bool block_error = false;
+
+ mythread_sync(coder->thr->mutex) {
+ if (coder->thr->state == THR_IDLE) {
+ // Something has gone wrong with the Block
+ // encoder. It has set coder->thread_error
+ // which we will read a few lines later.
+ block_error = true;
+ } else {
+ // Tell the Block encoder its new amount
+ // of input and update the state if needed.
+ coder->thr->in_size = thr_in_size;
+
+ if (finish)
+ coder->thr->state = THR_FINISH;
+
+ mythread_cond_signal(&coder->thr->cond);
+ }
+ }
+
+ if (block_error) {
+ lzma_ret ret = LZMA_OK; // Init to silence a warning.
+
+ mythread_sync(coder->mutex) {
+ ret = coder->thread_error;
+ }
+
+ return ret;
+ }
+
+ if (finish)
+ coder->thr = NULL;
+ }
+
+ return LZMA_OK;
+}
+
+
+/// Wait until more input can be consumed, more output can be read, or
+/// an optional timeout is reached.
+static bool
+wait_for_work(lzma_stream_coder *coder, mythread_condtime *wait_abs,
+ bool *has_blocked, bool has_input)
+{
+ if (coder->timeout != 0 && !*has_blocked) {
+ // Every time when stream_encode_mt() is called via
+ // lzma_code(), *has_blocked starts as false. We set it
+ // to true here and calculate the absolute time when
+ // we must return if there's nothing to do.
+ //
+ // This way if we block multiple times for short moments
+ // less than "timeout" milliseconds, we will return once
+ // "timeout" amount of time has passed since the *first*
+ // blocking occurred. If the absolute time was calculated
+ // again every time we block, "timeout" would effectively
+ // be meaningless if we never consecutively block longer
+ // than "timeout" ms.
+ *has_blocked = true;
+ mythread_condtime_set(wait_abs, &coder->cond, coder->timeout);
+ }
+
+ bool timed_out = false;
+
+ mythread_sync(coder->mutex) {
+ // There are four things that we wait. If one of them
+ // becomes possible, we return.
+ // - If there is input left, we need to get a free
+ // worker thread and an output buffer for it.
+ // - Data ready to be read from the output queue.
+ // - A worker thread indicates an error.
+ // - Time out occurs.
+ while ((!has_input || coder->threads_free == NULL
+ || !lzma_outq_has_buf(&coder->outq))
+ && !lzma_outq_is_readable(&coder->outq)
+ && coder->thread_error == LZMA_OK
+ && !timed_out) {
+ if (coder->timeout != 0)
+ timed_out = mythread_cond_timedwait(
+ &coder->cond, &coder->mutex,
+ wait_abs) != 0;
+ else
+ mythread_cond_wait(&coder->cond,
+ &coder->mutex);
+ }
+ }
+
+ return timed_out;
+}
+
+
+static lzma_ret
+stream_encode_mt(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_stream_coder *coder = coder_ptr;
+
+ switch (coder->sequence) {
+ case SEQ_STREAM_HEADER:
+ lzma_bufcpy(coder->header, &coder->header_pos,
+ sizeof(coder->header),
+ out, out_pos, out_size);
+ if (coder->header_pos < sizeof(coder->header))
+ return LZMA_OK;
+
+ coder->header_pos = 0;
+ coder->sequence = SEQ_BLOCK;
+
+ // Fall through
+
+ case SEQ_BLOCK: {
+ // Initialized to silence warnings.
+ lzma_vli unpadded_size = 0;
+ lzma_vli uncompressed_size = 0;
+ lzma_ret ret = LZMA_OK;
+
+ // These are for wait_for_work().
+ bool has_blocked = false;
+ mythread_condtime wait_abs = { 0 };
+
+ while (true) {
+ mythread_sync(coder->mutex) {
+ // Check for Block encoder errors.
+ ret = coder->thread_error;
+ if (ret != LZMA_OK) {
+ assert(ret != LZMA_STREAM_END);
+ break; // Break out of mythread_sync.
+ }
+
+ // Try to read compressed data to out[].
+ ret = lzma_outq_read(&coder->outq, allocator,
+ out, out_pos, out_size,
+ &unpadded_size,
+ &uncompressed_size);
+ }
+
+ if (ret == LZMA_STREAM_END) {
+ // End of Block. Add it to the Index.
+ ret = lzma_index_append(coder->index,
+ allocator, unpadded_size,
+ uncompressed_size);
+ if (ret != LZMA_OK) {
+ threads_stop(coder, false);
+ return ret;
+ }
+
+ // If we didn't fill the output buffer yet,
+ // try to read more data. Maybe the next
+ // outbuf has been finished already too.
+ if (*out_pos < out_size)
+ continue;
+ }
+
+ if (ret != LZMA_OK) {
+ // coder->thread_error was set.
+ threads_stop(coder, false);
+ return ret;
+ }
+
+ // Try to give uncompressed data to a worker thread.
+ ret = stream_encode_in(coder, allocator,
+ in, in_pos, in_size, action);
+ if (ret != LZMA_OK) {
+ threads_stop(coder, false);
+ return ret;
+ }
+
+ // See if we should wait or return.
+ //
+ // TODO: LZMA_SYNC_FLUSH and LZMA_SYNC_BARRIER.
+ if (*in_pos == in_size) {
+ // LZMA_RUN: More data is probably coming
+ // so return to let the caller fill the
+ // input buffer.
+ if (action == LZMA_RUN)
+ return LZMA_OK;
+
+ // LZMA_FULL_BARRIER: The same as with
+ // LZMA_RUN but tell the caller that the
+ // barrier was completed.
+ if (action == LZMA_FULL_BARRIER)
+ return LZMA_STREAM_END;
+
+ // Finishing or flushing isn't completed until
+ // all input data has been encoded and copied
+ // to the output buffer.
+ if (lzma_outq_is_empty(&coder->outq)) {
+ // LZMA_FINISH: Continue to encode
+ // the Index field.
+ if (action == LZMA_FINISH)
+ break;
+
+ // LZMA_FULL_FLUSH: Return to tell
+ // the caller that flushing was
+ // completed.
+ if (action == LZMA_FULL_FLUSH)
+ return LZMA_STREAM_END;
+ }
+ }
+
+ // Return if there is no output space left.
+ // This check must be done after testing the input
+ // buffer, because we might want to use a different
+ // return code.
+ if (*out_pos == out_size)
+ return LZMA_OK;
+
+ // Neither in nor out has been used completely.
+ // Wait until there's something we can do.
+ if (wait_for_work(coder, &wait_abs, &has_blocked,
+ *in_pos < in_size))
+ return LZMA_TIMED_OUT;
+ }
+
+ // All Blocks have been encoded and the threads have stopped.
+ // Prepare to encode the Index field.
+ return_if_error(lzma_index_encoder_init(
+ &coder->index_encoder, allocator,
+ coder->index));
+ coder->sequence = SEQ_INDEX;
+
+ // Update the progress info to take the Index and
+ // Stream Footer into account. Those are very fast to encode
+ // so in terms of progress information they can be thought
+ // to be ready to be copied out.
+ coder->progress_out += lzma_index_size(coder->index)
+ + LZMA_STREAM_HEADER_SIZE;
+ }
+
+ // Fall through
+
+ case SEQ_INDEX: {
+ // Call the Index encoder. It doesn't take any input, so
+ // those pointers can be NULL.
+ const lzma_ret ret = coder->index_encoder.code(
+ coder->index_encoder.coder, allocator,
+ NULL, NULL, 0,
+ out, out_pos, out_size, LZMA_RUN);
+ if (ret != LZMA_STREAM_END)
+ return ret;
+
+ // Encode the Stream Footer into coder->buffer.
+ coder->stream_flags.backward_size
+ = lzma_index_size(coder->index);
+ if (lzma_stream_footer_encode(&coder->stream_flags,
+ coder->header) != LZMA_OK)
+ return LZMA_PROG_ERROR;
+
+ coder->sequence = SEQ_STREAM_FOOTER;
+ }
+
+ // Fall through
+
+ case SEQ_STREAM_FOOTER:
+ lzma_bufcpy(coder->header, &coder->header_pos,
+ sizeof(coder->header),
+ out, out_pos, out_size);
+ return coder->header_pos < sizeof(coder->header)
+ ? LZMA_OK : LZMA_STREAM_END;
+ }
+
+ assert(0);
+ return LZMA_PROG_ERROR;
+}
+
+
+static void
+stream_encoder_mt_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_stream_coder *coder = coder_ptr;
+
+ // Threads must be killed before the output queue can be freed.
+ threads_end(coder, allocator);
+ lzma_outq_end(&coder->outq, allocator);
+
+ lzma_filters_free(coder->filters, allocator);
+ lzma_filters_free(coder->filters_cache, allocator);
+
+ lzma_next_end(&coder->index_encoder, allocator);
+ lzma_index_end(coder->index, allocator);
+
+ mythread_cond_destroy(&coder->cond);
+ mythread_mutex_destroy(&coder->mutex);
+
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+stream_encoder_mt_update(void *coder_ptr, const lzma_allocator *allocator,
+ const lzma_filter *filters,
+ const lzma_filter *reversed_filters
+ lzma_attribute((__unused__)))
+{
+ lzma_stream_coder *coder = coder_ptr;
+
+ // Applications shouldn't attempt to change the options when
+ // we are already encoding the Index or Stream Footer.
+ if (coder->sequence > SEQ_BLOCK)
+ return LZMA_PROG_ERROR;
+
+ // For now the threaded encoder doesn't support changing
+ // the options in the middle of a Block.
+ if (coder->thr != NULL)
+ return LZMA_PROG_ERROR;
+
+ // Check if the filter chain seems mostly valid. See the comment
+ // in stream_encoder_mt_init().
+ if (lzma_raw_encoder_memusage(filters) == UINT64_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ // Make a copy to a temporary buffer first. This way the encoder
+ // state stays unchanged if an error occurs in lzma_filters_copy().
+ lzma_filter temp[LZMA_FILTERS_MAX + 1];
+ return_if_error(lzma_filters_copy(filters, temp, allocator));
+
+ // Free the options of the old chain as well as the cache.
+ lzma_filters_free(coder->filters, allocator);
+ lzma_filters_free(coder->filters_cache, allocator);
+
+ // Copy the new filter chain in place.
+ memcpy(coder->filters, temp, sizeof(temp));
+
+ return LZMA_OK;
+}
+
+
+/// Options handling for lzma_stream_encoder_mt_init() and
+/// lzma_stream_encoder_mt_memusage()
+static lzma_ret
+get_options(const lzma_mt *options, lzma_options_easy *opt_easy,
+ const lzma_filter **filters, uint64_t *block_size,
+ uint64_t *outbuf_size_max)
+{
+ // Validate some of the options.
+ if (options == NULL)
+ return LZMA_PROG_ERROR;
+
+ if (options->flags != 0 || options->threads == 0
+ || options->threads > LZMA_THREADS_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ if (options->filters != NULL) {
+ // Filter chain was given, use it as is.
+ *filters = options->filters;
+ } else {
+ // Use a preset.
+ if (lzma_easy_preset(opt_easy, options->preset))
+ return LZMA_OPTIONS_ERROR;
+
+ *filters = opt_easy->filters;
+ }
+
+ // Block size
+ if (options->block_size > 0) {
+ if (options->block_size > BLOCK_SIZE_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ *block_size = options->block_size;
+ } else {
+ // Determine the Block size from the filter chain.
+ *block_size = lzma_mt_block_size(*filters);
+ if (*block_size == 0)
+ return LZMA_OPTIONS_ERROR;
+
+ assert(*block_size <= BLOCK_SIZE_MAX);
+ }
+
+ // Calculate the maximum amount output that a single output buffer
+ // may need to hold. This is the same as the maximum total size of
+ // a Block.
+ *outbuf_size_max = lzma_block_buffer_bound64(*block_size);
+ if (*outbuf_size_max == 0)
+ return LZMA_MEM_ERROR;
+
+ return LZMA_OK;
+}
+
+
+static void
+get_progress(void *coder_ptr, uint64_t *progress_in, uint64_t *progress_out)
+{
+ lzma_stream_coder *coder = coder_ptr;
+
+ // Lock coder->mutex to prevent finishing threads from moving their
+ // progress info from the worker_thread structure to lzma_stream_coder.
+ mythread_sync(coder->mutex) {
+ *progress_in = coder->progress_in;
+ *progress_out = coder->progress_out;
+
+ for (size_t i = 0; i < coder->threads_initialized; ++i) {
+ mythread_sync(coder->threads[i].mutex) {
+ *progress_in += coder->threads[i].progress_in;
+ *progress_out += coder->threads[i]
+ .progress_out;
+ }
+ }
+ }
+
+ return;
+}
+
+
+static lzma_ret
+stream_encoder_mt_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_mt *options)
+{
+ lzma_next_coder_init(&stream_encoder_mt_init, next, allocator);
+
+ // Get the filter chain.
+ lzma_options_easy easy;
+ const lzma_filter *filters;
+ uint64_t block_size;
+ uint64_t outbuf_size_max;
+ return_if_error(get_options(options, &easy, &filters,
+ &block_size, &outbuf_size_max));
+
+#if SIZE_MAX < UINT64_MAX
+ if (block_size > SIZE_MAX || outbuf_size_max > SIZE_MAX)
+ return LZMA_MEM_ERROR;
+#endif
+
+ // Validate the filter chain so that we can give an error in this
+ // function instead of delaying it to the first call to lzma_code().
+ // The memory usage calculation verifies the filter chain as
+ // a side effect so we take advantage of that. It's not a perfect
+ // check though as raw encoder allows LZMA1 too but such problems
+ // will be caught eventually with Block Header encoder.
+ if (lzma_raw_encoder_memusage(filters) == UINT64_MAX)
+ return LZMA_OPTIONS_ERROR;
+
+ // Validate the Check ID.
+ if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX)
+ return LZMA_PROG_ERROR;
+
+ if (!lzma_check_is_supported(options->check))
+ return LZMA_UNSUPPORTED_CHECK;
+
+ // Allocate and initialize the base structure if needed.
+ lzma_stream_coder *coder = next->coder;
+ if (coder == NULL) {
+ coder = lzma_alloc(sizeof(lzma_stream_coder), allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+
+ // For the mutex and condition variable initializations
+ // the error handling has to be done here because
+ // stream_encoder_mt_end() doesn't know if they have
+ // already been initialized or not.
+ if (mythread_mutex_init(&coder->mutex)) {
+ lzma_free(coder, allocator);
+ next->coder = NULL;
+ return LZMA_MEM_ERROR;
+ }
+
+ if (mythread_cond_init(&coder->cond)) {
+ mythread_mutex_destroy(&coder->mutex);
+ lzma_free(coder, allocator);
+ next->coder = NULL;
+ return LZMA_MEM_ERROR;
+ }
+
+ next->code = &stream_encode_mt;
+ next->end = &stream_encoder_mt_end;
+ next->get_progress = &get_progress;
+ next->update = &stream_encoder_mt_update;
+
+ coder->filters[0].id = LZMA_VLI_UNKNOWN;
+ coder->filters_cache[0].id = LZMA_VLI_UNKNOWN;
+ coder->index_encoder = LZMA_NEXT_CODER_INIT;
+ coder->index = NULL;
+ memzero(&coder->outq, sizeof(coder->outq));
+ coder->threads = NULL;
+ coder->threads_max = 0;
+ coder->threads_initialized = 0;
+ }
+
+ // Basic initializations
+ coder->sequence = SEQ_STREAM_HEADER;
+ coder->block_size = (size_t)(block_size);
+ coder->outbuf_alloc_size = (size_t)(outbuf_size_max);
+ coder->thread_error = LZMA_OK;
+ coder->thr = NULL;
+
+ // Allocate the thread-specific base structures.
+ assert(options->threads > 0);
+ if (coder->threads_max != options->threads) {
+ threads_end(coder, allocator);
+
+ coder->threads = NULL;
+ coder->threads_max = 0;
+
+ coder->threads_initialized = 0;
+ coder->threads_free = NULL;
+
+ coder->threads = lzma_alloc(
+ options->threads * sizeof(worker_thread),
+ allocator);
+ if (coder->threads == NULL)
+ return LZMA_MEM_ERROR;
+
+ coder->threads_max = options->threads;
+ } else {
+ // Reuse the old structures and threads. Tell the running
+ // threads to stop and wait until they have stopped.
+ threads_stop(coder, true);
+ }
+
+ // Output queue
+ return_if_error(lzma_outq_init(&coder->outq, allocator,
+ options->threads));
+
+ // Timeout
+ coder->timeout = options->timeout;
+
+ // Free the old filter chain and the cache.
+ lzma_filters_free(coder->filters, allocator);
+ lzma_filters_free(coder->filters_cache, allocator);
+
+ // Copy the new filter chain.
+ return_if_error(lzma_filters_copy(
+ filters, coder->filters, allocator));
+
+ // Index
+ lzma_index_end(coder->index, allocator);
+ coder->index = lzma_index_init(allocator);
+ if (coder->index == NULL)
+ return LZMA_MEM_ERROR;
+
+ // Stream Header
+ coder->stream_flags.version = 0;
+ coder->stream_flags.check = options->check;
+ return_if_error(lzma_stream_header_encode(
+ &coder->stream_flags, coder->header));
+
+ coder->header_pos = 0;
+
+ // Progress info
+ coder->progress_in = 0;
+ coder->progress_out = LZMA_STREAM_HEADER_SIZE;
+
+ return LZMA_OK;
+}
+
+
+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+// These are for compatibility with binaries linked against liblzma that
+// has been patched with xz-5.2.2-compat-libs.patch from RHEL/CentOS 7.
+// Actually that patch didn't create lzma_stream_encoder_mt@XZ_5.2.2
+// but it has been added here anyway since someone might misread the
+// RHEL patch and think both @XZ_5.1.2alpha and @XZ_5.2.2 exist.
+LZMA_SYMVER_API("lzma_stream_encoder_mt@XZ_5.1.2alpha",
+ lzma_ret, lzma_stream_encoder_mt_512a)(
+ lzma_stream *strm, const lzma_mt *options)
+ lzma_nothrow lzma_attr_warn_unused_result
+ __attribute__((__alias__("lzma_stream_encoder_mt_52")));
+
+LZMA_SYMVER_API("lzma_stream_encoder_mt@XZ_5.2.2",
+ lzma_ret, lzma_stream_encoder_mt_522)(
+ lzma_stream *strm, const lzma_mt *options)
+ lzma_nothrow lzma_attr_warn_unused_result
+ __attribute__((__alias__("lzma_stream_encoder_mt_52")));
+
+LZMA_SYMVER_API("lzma_stream_encoder_mt@@XZ_5.2",
+ lzma_ret, lzma_stream_encoder_mt_52)(
+ lzma_stream *strm, const lzma_mt *options)
+ lzma_nothrow lzma_attr_warn_unused_result;
+
+#define lzma_stream_encoder_mt lzma_stream_encoder_mt_52
+#endif
+extern LZMA_API(lzma_ret)
+lzma_stream_encoder_mt(lzma_stream *strm, const lzma_mt *options)
+{
+ lzma_next_strm_init(stream_encoder_mt_init, strm, options);
+
+ strm->internal->supported_actions[LZMA_RUN] = true;
+// strm->internal->supported_actions[LZMA_SYNC_FLUSH] = true;
+ strm->internal->supported_actions[LZMA_FULL_FLUSH] = true;
+ strm->internal->supported_actions[LZMA_FULL_BARRIER] = true;
+ strm->internal->supported_actions[LZMA_FINISH] = true;
+
+ return LZMA_OK;
+}
+
+
+#ifdef HAVE_SYMBOL_VERSIONS_LINUX
+LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@XZ_5.1.2alpha",
+ uint64_t, lzma_stream_encoder_mt_memusage_512a)(
+ const lzma_mt *options) lzma_nothrow lzma_attr_pure
+ __attribute__((__alias__("lzma_stream_encoder_mt_memusage_52")));
+
+LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@XZ_5.2.2",
+ uint64_t, lzma_stream_encoder_mt_memusage_522)(
+ const lzma_mt *options) lzma_nothrow lzma_attr_pure
+ __attribute__((__alias__("lzma_stream_encoder_mt_memusage_52")));
+
+LZMA_SYMVER_API("lzma_stream_encoder_mt_memusage@@XZ_5.2",
+ uint64_t, lzma_stream_encoder_mt_memusage_52)(
+ const lzma_mt *options) lzma_nothrow lzma_attr_pure;
+
+#define lzma_stream_encoder_mt_memusage lzma_stream_encoder_mt_memusage_52
+#endif
+// This function name is a monster but it's consistent with the older
+// monster names. :-( 31 chars is the max that C99 requires so in that
+// sense it's not too long. ;-)
+extern LZMA_API(uint64_t)
+lzma_stream_encoder_mt_memusage(const lzma_mt *options)
+{
+ lzma_options_easy easy;
+ const lzma_filter *filters;
+ uint64_t block_size;
+ uint64_t outbuf_size_max;
+
+ if (get_options(options, &easy, &filters, &block_size,
+ &outbuf_size_max) != LZMA_OK)
+ return UINT64_MAX;
+
+ // Memory usage of the input buffers
+ const uint64_t inbuf_memusage = options->threads * block_size;
+
+ // Memory usage of the filter encoders
+ uint64_t filters_memusage = lzma_raw_encoder_memusage(filters);
+ if (filters_memusage == UINT64_MAX)
+ return UINT64_MAX;
+
+ filters_memusage *= options->threads;
+
+ // Memory usage of the output queue
+ const uint64_t outq_memusage = lzma_outq_memusage(
+ outbuf_size_max, options->threads);
+ if (outq_memusage == UINT64_MAX)
+ return UINT64_MAX;
+
+ // Sum them with overflow checking.
+ uint64_t total_memusage = LZMA_MEMUSAGE_BASE
+ + sizeof(lzma_stream_coder)
+ + options->threads * sizeof(worker_thread);
+
+ if (UINT64_MAX - total_memusage < inbuf_memusage)
+ return UINT64_MAX;
+
+ total_memusage += inbuf_memusage;
+
+ if (UINT64_MAX - total_memusage < filters_memusage)
+ return UINT64_MAX;
+
+ total_memusage += filters_memusage;
+
+ if (UINT64_MAX - total_memusage < outq_memusage)
+ return UINT64_MAX;
+
+ return total_memusage + outq_memusage;
+}
diff --git a/src/liblzma/common/stream_flags_common.c b/src/liblzma/common/stream_flags_common.c
new file mode 100644
index 0000000..fbe8eb8
--- /dev/null
+++ b/src/liblzma/common/stream_flags_common.c
@@ -0,0 +1,47 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file stream_flags_common.c
+/// \brief Common stuff for Stream flags coders
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stream_flags_common.h"
+
+
+const uint8_t lzma_header_magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 };
+const uint8_t lzma_footer_magic[2] = { 0x59, 0x5A };
+
+
+extern LZMA_API(lzma_ret)
+lzma_stream_flags_compare(
+ const lzma_stream_flags *a, const lzma_stream_flags *b)
+{
+ // We can compare only version 0 structures.
+ if (a->version != 0 || b->version != 0)
+ return LZMA_OPTIONS_ERROR;
+
+ // Check type
+ if ((unsigned int)(a->check) > LZMA_CHECK_ID_MAX
+ || (unsigned int)(b->check) > LZMA_CHECK_ID_MAX)
+ return LZMA_PROG_ERROR;
+
+ if (a->check != b->check)
+ return LZMA_DATA_ERROR;
+
+ // Backward Sizes are compared only if they are known in both.
+ if (a->backward_size != LZMA_VLI_UNKNOWN
+ && b->backward_size != LZMA_VLI_UNKNOWN) {
+ if (!is_backward_size_valid(a) || !is_backward_size_valid(b))
+ return LZMA_PROG_ERROR;
+
+ if (a->backward_size != b->backward_size)
+ return LZMA_DATA_ERROR;
+ }
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/stream_flags_common.h b/src/liblzma/common/stream_flags_common.h
new file mode 100644
index 0000000..84e96ba
--- /dev/null
+++ b/src/liblzma/common/stream_flags_common.h
@@ -0,0 +1,36 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file stream_flags_common.h
+/// \brief Common stuff for Stream flags coders
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_STREAM_FLAGS_COMMON_H
+#define LZMA_STREAM_FLAGS_COMMON_H
+
+#include "common.h"
+
+/// Size of the Stream Flags field
+#define LZMA_STREAM_FLAGS_SIZE 2
+
+lzma_attr_visibility_hidden
+extern const uint8_t lzma_header_magic[6];
+
+lzma_attr_visibility_hidden
+extern const uint8_t lzma_footer_magic[2];
+
+
+static inline bool
+is_backward_size_valid(const lzma_stream_flags *options)
+{
+ return options->backward_size >= LZMA_BACKWARD_SIZE_MIN
+ && options->backward_size <= LZMA_BACKWARD_SIZE_MAX
+ && (options->backward_size & 3) == 0;
+}
+
+#endif
diff --git a/src/liblzma/common/stream_flags_decoder.c b/src/liblzma/common/stream_flags_decoder.c
new file mode 100644
index 0000000..b8d263b
--- /dev/null
+++ b/src/liblzma/common/stream_flags_decoder.c
@@ -0,0 +1,88 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file stream_flags_decoder.c
+/// \brief Decodes Stream Header and Stream Footer from .xz files
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stream_flags_common.h"
+
+
+static bool
+stream_flags_decode(lzma_stream_flags *options, const uint8_t *in)
+{
+ // Reserved bits must be unset.
+ if (in[0] != 0x00 || (in[1] & 0xF0))
+ return true;
+
+ options->version = 0;
+ options->check = in[1] & 0x0F;
+
+ return false;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_stream_header_decode(lzma_stream_flags *options, const uint8_t *in)
+{
+ // Magic
+ if (memcmp(in, lzma_header_magic, sizeof(lzma_header_magic)) != 0)
+ return LZMA_FORMAT_ERROR;
+
+ // Verify the CRC32 so we can distinguish between corrupt
+ // and unsupported files.
+ const uint32_t crc = lzma_crc32(in + sizeof(lzma_header_magic),
+ LZMA_STREAM_FLAGS_SIZE, 0);
+ if (crc != read32le(in + sizeof(lzma_header_magic)
+ + LZMA_STREAM_FLAGS_SIZE)) {
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ return LZMA_DATA_ERROR;
+#endif
+ }
+
+ // Stream Flags
+ if (stream_flags_decode(options, in + sizeof(lzma_header_magic)))
+ return LZMA_OPTIONS_ERROR;
+
+ // Set Backward Size to indicate unknown value. That way
+ // lzma_stream_flags_compare() can be used to compare Stream Header
+ // and Stream Footer while keeping it useful also for comparing
+ // two Stream Footers.
+ options->backward_size = LZMA_VLI_UNKNOWN;
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_stream_footer_decode(lzma_stream_flags *options, const uint8_t *in)
+{
+ // Magic
+ if (memcmp(in + sizeof(uint32_t) * 2 + LZMA_STREAM_FLAGS_SIZE,
+ lzma_footer_magic, sizeof(lzma_footer_magic)) != 0)
+ return LZMA_FORMAT_ERROR;
+
+ // CRC32
+ const uint32_t crc = lzma_crc32(in + sizeof(uint32_t),
+ sizeof(uint32_t) + LZMA_STREAM_FLAGS_SIZE, 0);
+ if (crc != read32le(in)) {
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ return LZMA_DATA_ERROR;
+#endif
+ }
+
+ // Stream Flags
+ if (stream_flags_decode(options, in + sizeof(uint32_t) * 2))
+ return LZMA_OPTIONS_ERROR;
+
+ // Backward Size
+ options->backward_size = read32le(in + sizeof(uint32_t));
+ options->backward_size = (options->backward_size + 1) * 4;
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/stream_flags_encoder.c b/src/liblzma/common/stream_flags_encoder.c
new file mode 100644
index 0000000..b98ab17
--- /dev/null
+++ b/src/liblzma/common/stream_flags_encoder.c
@@ -0,0 +1,86 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file stream_flags_encoder.c
+/// \brief Encodes Stream Header and Stream Footer for .xz files
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "stream_flags_common.h"
+
+
+static bool
+stream_flags_encode(const lzma_stream_flags *options, uint8_t *out)
+{
+ if ((unsigned int)(options->check) > LZMA_CHECK_ID_MAX)
+ return true;
+
+ out[0] = 0x00;
+ out[1] = options->check;
+
+ return false;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_stream_header_encode(const lzma_stream_flags *options, uint8_t *out)
+{
+ assert(sizeof(lzma_header_magic) + LZMA_STREAM_FLAGS_SIZE
+ + 4 == LZMA_STREAM_HEADER_SIZE);
+
+ if (options->version != 0)
+ return LZMA_OPTIONS_ERROR;
+
+ // Magic
+ memcpy(out, lzma_header_magic, sizeof(lzma_header_magic));
+
+ // Stream Flags
+ if (stream_flags_encode(options, out + sizeof(lzma_header_magic)))
+ return LZMA_PROG_ERROR;
+
+ // CRC32 of the Stream Header
+ const uint32_t crc = lzma_crc32(out + sizeof(lzma_header_magic),
+ LZMA_STREAM_FLAGS_SIZE, 0);
+
+ write32le(out + sizeof(lzma_header_magic) + LZMA_STREAM_FLAGS_SIZE,
+ crc);
+
+ return LZMA_OK;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_stream_footer_encode(const lzma_stream_flags *options, uint8_t *out)
+{
+ assert(2 * 4 + LZMA_STREAM_FLAGS_SIZE + sizeof(lzma_footer_magic)
+ == LZMA_STREAM_HEADER_SIZE);
+
+ if (options->version != 0)
+ return LZMA_OPTIONS_ERROR;
+
+ // Backward Size
+ if (!is_backward_size_valid(options))
+ return LZMA_PROG_ERROR;
+
+ write32le(out + 4, options->backward_size / 4 - 1);
+
+ // Stream Flags
+ if (stream_flags_encode(options, out + 2 * 4))
+ return LZMA_PROG_ERROR;
+
+ // CRC32
+ const uint32_t crc = lzma_crc32(
+ out + 4, 4 + LZMA_STREAM_FLAGS_SIZE, 0);
+
+ write32le(out, crc);
+
+ // Magic
+ memcpy(out + 2 * 4 + LZMA_STREAM_FLAGS_SIZE,
+ lzma_footer_magic, sizeof(lzma_footer_magic));
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/common/string_conversion.c b/src/liblzma/common/string_conversion.c
new file mode 100644
index 0000000..d2c1e80
--- /dev/null
+++ b/src/liblzma/common/string_conversion.c
@@ -0,0 +1,1324 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file string_conversion.c
+/// \brief Conversion of strings to filter chain and vice versa
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "filter_common.h"
+
+
+/////////////////////
+// String building //
+/////////////////////
+
+/// How much memory to allocate for strings. For now, no realloc is used
+/// so this needs to be big enough even though there of course is
+/// an overflow check still.
+///
+/// FIXME? Using a fixed size is wasteful if the application doesn't free
+/// the string fairly quickly but this can be improved later if needed.
+#define STR_ALLOC_SIZE 800
+
+
+typedef struct {
+ char *buf;
+ size_t pos;
+} lzma_str;
+
+
+static lzma_ret
+str_init(lzma_str *str, const lzma_allocator *allocator)
+{
+ str->buf = lzma_alloc(STR_ALLOC_SIZE, allocator);
+ if (str->buf == NULL)
+ return LZMA_MEM_ERROR;
+
+ str->pos = 0;
+ return LZMA_OK;
+}
+
+
+static void
+str_free(lzma_str *str, const lzma_allocator *allocator)
+{
+ lzma_free(str->buf, allocator);
+ return;
+}
+
+
+static bool
+str_is_full(const lzma_str *str)
+{
+ return str->pos == STR_ALLOC_SIZE - 1;
+}
+
+
+static lzma_ret
+str_finish(char **dest, lzma_str *str, const lzma_allocator *allocator)
+{
+ if (str_is_full(str)) {
+ // The preallocated buffer was too small.
+ // This shouldn't happen as STR_ALLOC_SIZE should
+ // be adjusted if new filters are added.
+ lzma_free(str->buf, allocator);
+ *dest = NULL;
+ assert(0);
+ return LZMA_PROG_ERROR;
+ }
+
+ str->buf[str->pos] = '\0';
+ *dest = str->buf;
+ return LZMA_OK;
+}
+
+
+static void
+str_append_str(lzma_str *str, const char *s)
+{
+ const size_t len = strlen(s);
+ const size_t limit = STR_ALLOC_SIZE - 1 - str->pos;
+ const size_t copy_size = my_min(len, limit);
+
+ memcpy(str->buf + str->pos, s, copy_size);
+ str->pos += copy_size;
+ return;
+}
+
+
+static void
+str_append_u32(lzma_str *str, uint32_t v, bool use_byte_suffix)
+{
+ if (v == 0) {
+ str_append_str(str, "0");
+ } else {
+ // NOTE: Don't use plain "B" because xz and the parser in this
+ // file don't support it and at glance it may look like 8
+ // (there cannot be a space before the suffix).
+ static const char suffixes[4][4] = { "", "KiB", "MiB", "GiB" };
+
+ size_t suf = 0;
+ if (use_byte_suffix) {
+ while ((v & 1023) == 0
+ && suf < ARRAY_SIZE(suffixes) - 1) {
+ v >>= 10;
+ ++suf;
+ }
+ }
+
+ // UINT32_MAX in base 10 would need 10 + 1 bytes. Remember
+ // that initializing to "" initializes all elements to
+ // zero so '\0'-termination gets handled by this.
+ char buf[16] = "";
+ size_t pos = sizeof(buf) - 1;
+
+ do {
+ buf[--pos] = '0' + (v % 10);
+ v /= 10;
+ } while (v != 0);
+
+ str_append_str(str, buf + pos);
+ str_append_str(str, suffixes[suf]);
+ }
+
+ return;
+}
+
+
+//////////////////////////////////////////////
+// Parsing and stringification declarations //
+//////////////////////////////////////////////
+
+/// Maximum length for filter and option names.
+/// 11 chars + terminating '\0' + sizeof(uint32_t) = 16 bytes
+#define NAME_LEN_MAX 11
+
+
+/// For option_map.flags: Use .u.map to do convert the input value
+/// to an integer. Without this flag, .u.range.{min,max} are used
+/// as the allowed range for the integer.
+#define OPTMAP_USE_NAME_VALUE_MAP 0x01
+
+/// For option_map.flags: Allow KiB/MiB/GiB in input string and use them in
+/// the stringified output if the value is an exact multiple of these.
+/// This is used e.g. for LZMA1/2 dictionary size.
+#define OPTMAP_USE_BYTE_SUFFIX 0x02
+
+/// For option_map.flags: If the integer value is zero then this option
+/// won't be included in the stringified output. It's used e.g. for
+/// BCJ filter start offset which usually is zero.
+#define OPTMAP_NO_STRFY_ZERO 0x04
+
+/// Possible values for option_map.type. Since OPTMAP_TYPE_UINT32 is 0,
+/// it doesn't need to be specified in the initializers as it is
+/// the implicit value.
+enum {
+ OPTMAP_TYPE_UINT32,
+ OPTMAP_TYPE_LZMA_MODE,
+ OPTMAP_TYPE_LZMA_MATCH_FINDER,
+ OPTMAP_TYPE_LZMA_PRESET,
+};
+
+
+/// This is for mapping string values in options to integers.
+/// The last element of an array must have "" as the name.
+/// It's used e.g. for match finder names in LZMA1/2.
+typedef struct {
+ const char name[NAME_LEN_MAX + 1];
+ const uint32_t value;
+} name_value_map;
+
+
+/// Each filter that has options needs an array of option_map structures.
+/// The array doesn't need to be terminated as the functions take the
+/// length of the array as an argument.
+///
+/// When converting a string to filter options structure, option values
+/// will be handled in a few different ways:
+///
+/// (1) If .type equals OPTMAP_TYPE_LZMA_PRESET then LZMA1/2 preset string
+/// is handled specially.
+///
+/// (2) If .flags has OPTMAP_USE_NAME_VALUE_MAP set then the string is
+/// converted to an integer using the name_value_map pointed by .u.map.
+/// The last element in .u.map must have .name = "" as the terminator.
+///
+/// (3) Otherwise the string is treated as a non-negative unsigned decimal
+/// integer which must be in the range set in .u.range. If .flags has
+/// OPTMAP_USE_BYTE_SUFFIX then KiB, MiB, and GiB suffixes are allowed.
+///
+/// The integer value from (2) or (3) is then stored to filter_options
+/// at the offset specified in .offset using the type specified in .type
+/// (default is uint32_t).
+///
+/// Stringifying a filter is done by processing a given number of options
+/// in order from the beginning of an option_map array. The integer is
+/// read from filter_options at .offset using the type from .type.
+///
+/// If the integer is zero and .flags has OPTMAP_NO_STRFY_ZERO then the
+/// option is skipped.
+///
+/// If .flags has OPTMAP_USE_NAME_VALUE_MAP set then .u.map will be used
+/// to convert the option to a string. If the map doesn't contain a string
+/// for the integer value then "UNKNOWN" is used.
+///
+/// If .flags doesn't have OPTMAP_USE_NAME_VALUE_MAP set then the integer is
+/// converted to a decimal value. If OPTMAP_USE_BYTE_SUFFIX is used then KiB,
+/// MiB, or GiB suffix is used if the value is an exact multiple of these.
+/// Plain "B" suffix is never used.
+typedef struct {
+ char name[NAME_LEN_MAX + 1];
+ uint8_t type;
+ uint8_t flags;
+ uint16_t offset;
+
+ union {
+ struct {
+ uint32_t min;
+ uint32_t max;
+ } range;
+
+ const name_value_map *map;
+ } u;
+} option_map;
+
+
+static const char *parse_options(const char **const str, const char *str_end,
+ void *filter_options,
+ const option_map *const optmap, const size_t optmap_size);
+
+
+/////////
+// BCJ //
+/////////
+
+#if defined(HAVE_ENCODER_X86) \
+ || defined(HAVE_DECODER_X86) \
+ || defined(HAVE_ENCODER_ARM) \
+ || defined(HAVE_DECODER_ARM) \
+ || defined(HAVE_ENCODER_ARMTHUMB) \
+ || defined(HAVE_DECODER_ARMTHUMB) \
+ || defined(HAVE_ENCODER_ARM64) \
+ || defined(HAVE_DECODER_ARM64) \
+ || defined(HAVE_ENCODER_POWERPC) \
+ || defined(HAVE_DECODER_POWERPC) \
+ || defined(HAVE_ENCODER_IA64) \
+ || defined(HAVE_DECODER_IA64) \
+ || defined(HAVE_ENCODER_SPARC) \
+ || defined(HAVE_DECODER_SPARC)
+static const option_map bcj_optmap[] = {
+ {
+ .name = "start",
+ .flags = OPTMAP_NO_STRFY_ZERO | OPTMAP_USE_BYTE_SUFFIX,
+ .offset = offsetof(lzma_options_bcj, start_offset),
+ .u.range.min = 0,
+ .u.range.max = UINT32_MAX,
+ }
+};
+
+
+static const char *
+parse_bcj(const char **const str, const char *str_end, void *filter_options)
+{
+ // filter_options was zeroed on allocation and that is enough
+ // for the default value.
+ return parse_options(str, str_end, filter_options,
+ bcj_optmap, ARRAY_SIZE(bcj_optmap));
+}
+#endif
+
+
+///////////
+// Delta //
+///////////
+
+#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
+static const option_map delta_optmap[] = {
+ {
+ .name = "dist",
+ .offset = offsetof(lzma_options_delta, dist),
+ .u.range.min = LZMA_DELTA_DIST_MIN,
+ .u.range.max = LZMA_DELTA_DIST_MAX,
+ }
+};
+
+
+static const char *
+parse_delta(const char **const str, const char *str_end, void *filter_options)
+{
+ lzma_options_delta *opts = filter_options;
+ opts->type = LZMA_DELTA_TYPE_BYTE;
+ opts->dist = LZMA_DELTA_DIST_MIN;
+
+ return parse_options(str, str_end, filter_options,
+ delta_optmap, ARRAY_SIZE(delta_optmap));
+}
+#endif
+
+
+///////////////////
+// LZMA1 & LZMA2 //
+///////////////////
+
+/// Help string for presets
+#define LZMA12_PRESET_STR "0-9[e]"
+
+
+static const char *
+parse_lzma12_preset(const char **const str, const char *str_end,
+ uint32_t *preset)
+{
+ assert(*str < str_end);
+ *preset = (uint32_t)(**str - '0');
+
+ // NOTE: Remember to update LZMA12_PRESET_STR if this is modified!
+ while (++*str < str_end) {
+ switch (**str) {
+ case 'e':
+ *preset |= LZMA_PRESET_EXTREME;
+ break;
+
+ default:
+ return "Unsupported preset flag";
+ }
+ }
+
+ return NULL;
+}
+
+
+static const char *
+set_lzma12_preset(const char **const str, const char *str_end,
+ void *filter_options)
+{
+ uint32_t preset;
+ const char *errmsg = parse_lzma12_preset(str, str_end, &preset);
+ if (errmsg != NULL)
+ return errmsg;
+
+ lzma_options_lzma *opts = filter_options;
+ if (lzma_lzma_preset(opts, preset))
+ return "Unsupported preset";
+
+ return NULL;
+}
+
+
+static const name_value_map lzma12_mode_map[] = {
+ { "fast", LZMA_MODE_FAST },
+ { "normal", LZMA_MODE_NORMAL },
+ { "", 0 }
+};
+
+
+static const name_value_map lzma12_mf_map[] = {
+ { "hc3", LZMA_MF_HC3 },
+ { "hc4", LZMA_MF_HC4 },
+ { "bt2", LZMA_MF_BT2 },
+ { "bt3", LZMA_MF_BT3 },
+ { "bt4", LZMA_MF_BT4 },
+ { "", 0 }
+};
+
+
+static const option_map lzma12_optmap[] = {
+ {
+ .name = "preset",
+ .type = OPTMAP_TYPE_LZMA_PRESET,
+ }, {
+ .name = "dict",
+ .flags = OPTMAP_USE_BYTE_SUFFIX,
+ .offset = offsetof(lzma_options_lzma, dict_size),
+ .u.range.min = LZMA_DICT_SIZE_MIN,
+ // FIXME? The max is really max for encoding but decoding
+ // would allow 4 GiB - 1 B.
+ .u.range.max = (UINT32_C(1) << 30) + (UINT32_C(1) << 29),
+ }, {
+ .name = "lc",
+ .offset = offsetof(lzma_options_lzma, lc),
+ .u.range.min = LZMA_LCLP_MIN,
+ .u.range.max = LZMA_LCLP_MAX,
+ }, {
+ .name = "lp",
+ .offset = offsetof(lzma_options_lzma, lp),
+ .u.range.min = LZMA_LCLP_MIN,
+ .u.range.max = LZMA_LCLP_MAX,
+ }, {
+ .name = "pb",
+ .offset = offsetof(lzma_options_lzma, pb),
+ .u.range.min = LZMA_PB_MIN,
+ .u.range.max = LZMA_PB_MAX,
+ }, {
+ .name = "mode",
+ .type = OPTMAP_TYPE_LZMA_MODE,
+ .flags = OPTMAP_USE_NAME_VALUE_MAP,
+ .offset = offsetof(lzma_options_lzma, mode),
+ .u.map = lzma12_mode_map,
+ }, {
+ .name = "nice",
+ .offset = offsetof(lzma_options_lzma, nice_len),
+ .u.range.min = 2,
+ .u.range.max = 273,
+ }, {
+ .name = "mf",
+ .type = OPTMAP_TYPE_LZMA_MATCH_FINDER,
+ .flags = OPTMAP_USE_NAME_VALUE_MAP,
+ .offset = offsetof(lzma_options_lzma, mf),
+ .u.map = lzma12_mf_map,
+ }, {
+ .name = "depth",
+ .offset = offsetof(lzma_options_lzma, depth),
+ .u.range.min = 0,
+ .u.range.max = UINT32_MAX,
+ }
+};
+
+
+static const char *
+parse_lzma12(const char **const str, const char *str_end, void *filter_options)
+{
+ lzma_options_lzma *opts = filter_options;
+
+ // It cannot fail.
+ const bool preset_ret = lzma_lzma_preset(opts, LZMA_PRESET_DEFAULT);
+ assert(!preset_ret);
+ (void)preset_ret;
+
+ const char *errmsg = parse_options(str, str_end, filter_options,
+ lzma12_optmap, ARRAY_SIZE(lzma12_optmap));
+ if (errmsg != NULL)
+ return errmsg;
+
+ if (opts->lc + opts->lp > LZMA_LCLP_MAX)
+ return "The sum of lc and lp must not exceed 4";
+
+ return NULL;
+}
+
+
+/////////////////////////////////////////
+// Generic parsing and stringification //
+/////////////////////////////////////////
+
+static const struct {
+ /// Name of the filter
+ char name[NAME_LEN_MAX + 1];
+
+ /// For lzma_str_to_filters:
+ /// Size of the filter-specific options structure.
+ uint32_t opts_size;
+
+ /// Filter ID
+ lzma_vli id;
+
+ /// For lzma_str_to_filters:
+ /// Function to parse the filter-specific options. The filter_options
+ /// will already have been allocated using lzma_alloc_zero().
+ const char *(*parse)(const char **str, const char *str_end,
+ void *filter_options);
+
+ /// For lzma_str_from_filters:
+ /// If the flag LZMA_STR_ENCODER is used then the first
+ /// strfy_encoder elements of optmap are stringified.
+ /// With LZMA_STR_DECODER strfy_decoder is used.
+ /// Currently encoders use all options that decoders do but if
+ /// that changes then this needs to be changed too, for example,
+ /// add a new OPTMAP flag to skip printing some decoder-only options.
+ const option_map *optmap;
+ uint8_t strfy_encoder;
+ uint8_t strfy_decoder;
+
+ /// For lzma_str_from_filters:
+ /// If true, lzma_filter.options is allowed to be NULL. In that case,
+ /// only the filter name is printed without any options.
+ bool allow_null;
+
+} filter_name_map[] = {
+#if defined (HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1)
+ { "lzma1", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA1,
+ &parse_lzma12, lzma12_optmap, 9, 5, false },
+#endif
+
+#if defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2)
+ { "lzma2", sizeof(lzma_options_lzma), LZMA_FILTER_LZMA2,
+ &parse_lzma12, lzma12_optmap, 9, 2, false },
+#endif
+
+#if defined(HAVE_ENCODER_X86) || defined(HAVE_DECODER_X86)
+ { "x86", sizeof(lzma_options_bcj), LZMA_FILTER_X86,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_ARM) || defined(HAVE_DECODER_ARM)
+ { "arm", sizeof(lzma_options_bcj), LZMA_FILTER_ARM,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_ARMTHUMB) || defined(HAVE_DECODER_ARMTHUMB)
+ { "armthumb", sizeof(lzma_options_bcj), LZMA_FILTER_ARMTHUMB,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_ARM64) || defined(HAVE_DECODER_ARM64)
+ { "arm64", sizeof(lzma_options_bcj), LZMA_FILTER_ARM64,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_POWERPC) || defined(HAVE_DECODER_POWERPC)
+ { "powerpc", sizeof(lzma_options_bcj), LZMA_FILTER_POWERPC,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_IA64) || defined(HAVE_DECODER_IA64)
+ { "ia64", sizeof(lzma_options_bcj), LZMA_FILTER_IA64,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_SPARC) || defined(HAVE_DECODER_SPARC)
+ { "sparc", sizeof(lzma_options_bcj), LZMA_FILTER_SPARC,
+ &parse_bcj, bcj_optmap, 1, 1, true },
+#endif
+
+#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA)
+ { "delta", sizeof(lzma_options_delta), LZMA_FILTER_DELTA,
+ &parse_delta, delta_optmap, 1, 1, false },
+#endif
+};
+
+
+/// Decodes options from a string for one filter (name1=value1,name2=value2).
+/// Caller must have allocated memory for filter_options already and set
+/// the initial default values. This is called from the filter-specific
+/// parse_* functions.
+///
+/// The input string starts at *str and the address in str_end is the first
+/// char that is not part of the string anymore. So no '\0' terminator is
+/// used. *str is advanced every time something has been decoded successfully.
+static const char *
+parse_options(const char **const str, const char *str_end,
+ void *filter_options,
+ const option_map *const optmap, const size_t optmap_size)
+{
+ while (*str < str_end && **str != '\0') {
+ // Each option is of the form name=value.
+ // Commas (',') separate options. Extra commas are ignored.
+ // Ignoring extra commas makes it simpler if an optional
+ // option stored in a shell variable which can be empty.
+ if (**str == ',') {
+ ++*str;
+ continue;
+ }
+
+ // Find where the next name=value ends.
+ const size_t str_len = (size_t)(str_end - *str);
+ const char *name_eq_value_end = memchr(*str, ',', str_len);
+ if (name_eq_value_end == NULL)
+ name_eq_value_end = str_end;
+
+ const char *equals_sign = memchr(*str, '=',
+ (size_t)(name_eq_value_end - *str));
+
+ // Fail if the '=' wasn't found or the option name is missing
+ // (the first char is '=').
+ if (equals_sign == NULL || **str == '=')
+ return "Options must be 'name=value' pairs separated "
+ "with commas";
+
+ // Reject a too long option name so that the memcmp()
+ // in the loop below won't read past the end of the
+ // string in optmap[i].name.
+ const size_t name_len = (size_t)(equals_sign - *str);
+ if (name_len > NAME_LEN_MAX)
+ return "Unknown option name";
+
+ // Find the option name from optmap[].
+ size_t i = 0;
+ while (true) {
+ if (i == optmap_size)
+ return "Unknown option name";
+
+ if (memcmp(*str, optmap[i].name, name_len) == 0
+ && optmap[i].name[name_len] == '\0')
+ break;
+
+ ++i;
+ }
+
+ // The input string is good at least until the start of
+ // the option value.
+ *str = equals_sign + 1;
+
+ // The code assumes that the option value isn't an empty
+ // string so check it here.
+ const size_t value_len = (size_t)(name_eq_value_end - *str);
+ if (value_len == 0)
+ return "Option value cannot be empty";
+
+ // LZMA1/2 preset has its own parsing function.
+ if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET) {
+ const char *errmsg = set_lzma12_preset(str,
+ name_eq_value_end, filter_options);
+ if (errmsg != NULL)
+ return errmsg;
+
+ continue;
+ }
+
+ // It's an integer value.
+ uint32_t v;
+ if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
+ // The integer is picked from a string-to-integer map.
+ //
+ // Reject a too long value string so that the memcmp()
+ // in the loop below won't read past the end of the
+ // string in optmap[i].u.map[j].name.
+ if (value_len > NAME_LEN_MAX)
+ return "Invalid option value";
+
+ const name_value_map *map = optmap[i].u.map;
+ size_t j = 0;
+ while (true) {
+ // The array is terminated with an empty name.
+ if (map[j].name[0] == '\0')
+ return "Invalid option value";
+
+ if (memcmp(*str, map[j].name, value_len) == 0
+ && map[j].name[value_len]
+ == '\0') {
+ v = map[j].value;
+ break;
+ }
+
+ ++j;
+ }
+ } else if (**str < '0' || **str > '9') {
+ // Note that "max" isn't supported while it is
+ // supported in xz. It's not useful here.
+ return "Value is not a non-negative decimal integer";
+ } else {
+ // strtoul() has locale-specific behavior so it cannot
+ // be relied on to get reproducible results since we
+ // cannot change the locate in a thread-safe library.
+ // It also needs '\0'-termination.
+ //
+ // Use a temporary pointer so that *str will point
+ // to the beginning of the value string in case
+ // an error occurs.
+ const char *p = *str;
+ v = 0;
+ do {
+ if (v > UINT32_MAX / 10)
+ return "Value out of range";
+
+ v *= 10;
+
+ const uint32_t add = (uint32_t)(*p - '0');
+ if (UINT32_MAX - add < v)
+ return "Value out of range";
+
+ v += add;
+ ++p;
+ } while (p < name_eq_value_end
+ && *p >= '0' && *p <= '9');
+
+ if (p < name_eq_value_end) {
+ // Remember this position so that it can be
+ // used for error messages that are
+ // specifically about the suffix. (Out of
+ // range values are about the whole value
+ // and those error messages point to the
+ // beginning of the number part,
+ // not to the suffix.)
+ const char *multiplier_start = p;
+
+ // If multiplier suffix shouldn't be used
+ // then don't allow them even if the value
+ // would stay within limits. This is a somewhat
+ // unnecessary check but it rejects silly
+ // things like lzma2:pb=0MiB which xz allows.
+ if ((optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX)
+ == 0) {
+ *str = multiplier_start;
+ return "This option does not support "
+ "any integer suffixes";
+ }
+
+ uint32_t shift;
+
+ switch (*p) {
+ case 'k':
+ case 'K':
+ shift = 10;
+ break;
+
+ case 'm':
+ case 'M':
+ shift = 20;
+ break;
+
+ case 'g':
+ case 'G':
+ shift = 30;
+ break;
+
+ default:
+ *str = multiplier_start;
+ return "Invalid multiplier suffix "
+ "(KiB, MiB, or GiB)";
+ }
+
+ ++p;
+
+ // Allow "M", "Mi", "MB", "MiB" and the same
+ // for the other five characters from the
+ // switch-statement above. All are handled
+ // as base-2 (perhaps a mistake, perhaps not).
+ // Note that 'i' and 'B' are case sensitive.
+ if (p < name_eq_value_end && *p == 'i')
+ ++p;
+
+ if (p < name_eq_value_end && *p == 'B')
+ ++p;
+
+ // Now we must have no chars remaining.
+ if (p < name_eq_value_end) {
+ *str = multiplier_start;
+ return "Invalid multiplier suffix "
+ "(KiB, MiB, or GiB)";
+ }
+
+ if (v > (UINT32_MAX >> shift))
+ return "Value out of range";
+
+ v <<= shift;
+ }
+
+ if (v < optmap[i].u.range.min
+ || v > optmap[i].u.range.max)
+ return "Value out of range";
+ }
+
+ // Set the value in filter_options. Enums are handled
+ // specially since the underlying type isn't the same
+ // as uint32_t on all systems.
+ void *ptr = (char *)filter_options + optmap[i].offset;
+ switch (optmap[i].type) {
+ case OPTMAP_TYPE_LZMA_MODE:
+ *(lzma_mode *)ptr = (lzma_mode)v;
+ break;
+
+ case OPTMAP_TYPE_LZMA_MATCH_FINDER:
+ *(lzma_match_finder *)ptr = (lzma_match_finder)v;
+ break;
+
+ default:
+ *(uint32_t *)ptr = v;
+ break;
+ }
+
+ // This option has been successfully handled.
+ *str = name_eq_value_end;
+ }
+
+ // No errors.
+ return NULL;
+}
+
+
+/// Finds the name of the filter at the beginning of the string and
+/// calls filter_name_map[i].parse() to decode the filter-specific options.
+/// The caller must have set str_end so that exactly one filter and its
+/// options are present without any trailing characters.
+static const char *
+parse_filter(const char **const str, const char *str_end, lzma_filter *filter,
+ const lzma_allocator *allocator, bool only_xz)
+{
+ // Search for a colon or equals sign that would separate the filter
+ // name from filter options. If neither is found, then the input
+ // string only contains a filter name and there are no options.
+ //
+ // First assume that a colon or equals sign won't be found:
+ const char *name_end = str_end;
+ const char *opts_start = str_end;
+
+ for (const char *p = *str; p < str_end; ++p) {
+ if (*p == ':' || *p == '=') {
+ name_end = p;
+
+ // Filter options (name1=value1,name2=value2,...)
+ // begin after the colon or equals sign.
+ opts_start = p + 1;
+ break;
+ }
+ }
+
+ // Reject a too long filter name so that the memcmp()
+ // in the loop below won't read past the end of the
+ // string in filter_name_map[i].name.
+ const size_t name_len = (size_t)(name_end - *str);
+ if (name_len > NAME_LEN_MAX)
+ return "Unknown filter name";
+
+ for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
+ if (memcmp(*str, filter_name_map[i].name, name_len) == 0
+ && filter_name_map[i].name[name_len] == '\0') {
+ if (only_xz && filter_name_map[i].id
+ >= LZMA_FILTER_RESERVED_START)
+ return "This filter cannot be used in "
+ "the .xz format";
+
+ // Allocate the filter-specific options and
+ // initialize the memory with zeros.
+ void *options = lzma_alloc_zero(
+ filter_name_map[i].opts_size,
+ allocator);
+ if (options == NULL)
+ return "Memory allocation failed";
+
+ // Filter name was found so the input string is good
+ // at least this far.
+ *str = opts_start;
+
+ const char *errmsg = filter_name_map[i].parse(
+ str, str_end, options);
+ if (errmsg != NULL) {
+ lzma_free(options, allocator);
+ return errmsg;
+ }
+
+ // *filter is modified only when parsing is successful.
+ filter->id = filter_name_map[i].id;
+ filter->options = options;
+ return NULL;
+ }
+ }
+
+ return "Unknown filter name";
+}
+
+
+/// Converts the string to a filter chain (array of lzma_filter structures).
+///
+/// *str is advanced every time something has been decoded successfully.
+/// This way the caller knows where in the string a possible error occurred.
+static const char *
+str_to_filters(const char **const str, lzma_filter *filters, uint32_t flags,
+ const lzma_allocator *allocator)
+{
+ const char *errmsg;
+
+ // Skip leading spaces.
+ while (**str == ' ')
+ ++*str;
+
+ if (**str == '\0')
+ return "Empty string is not allowed, "
+ "try \"6\" if a default value is needed";
+
+ // Detect the type of the string.
+ //
+ // A string beginning with a digit or a string beginning with
+ // one dash and a digit are treated as presets. Trailing spaces
+ // will be ignored too (leading spaces were already ignored above).
+ //
+ // For example, "6", "7 ", "-9e", or " -3 " are treated as presets.
+ // Strings like "-" or "- " aren't preset.
+#define MY_IS_DIGIT(c) ((c) >= '0' && (c) <= '9')
+ if (MY_IS_DIGIT(**str) || (**str == '-' && MY_IS_DIGIT((*str)[1]))) {
+ if (**str == '-')
+ ++*str;
+
+ // Ignore trailing spaces.
+ const size_t str_len = strlen(*str);
+ const char *str_end = memchr(*str, ' ', str_len);
+ if (str_end != NULL) {
+ // There is at least one trailing space. Check that
+ // there are no chars other than spaces.
+ for (size_t i = 1; str_end[i] != '\0'; ++i)
+ if (str_end[i] != ' ')
+ return "Unsupported preset";
+ } else {
+ // There are no trailing spaces. Use the whole string.
+ str_end = *str + str_len;
+ }
+
+ uint32_t preset;
+ errmsg = parse_lzma12_preset(str, str_end, &preset);
+ if (errmsg != NULL)
+ return errmsg;
+
+ lzma_options_lzma *opts = lzma_alloc(sizeof(*opts), allocator);
+ if (opts == NULL)
+ return "Memory allocation failed";
+
+ if (lzma_lzma_preset(opts, preset)) {
+ lzma_free(opts, allocator);
+ return "Unsupported preset";
+ }
+
+ filters[0].id = LZMA_FILTER_LZMA2;
+ filters[0].options = opts;
+ filters[1].id = LZMA_VLI_UNKNOWN;
+ filters[1].options = NULL;
+
+ return NULL;
+ }
+
+ // Not a preset so it must be a filter chain.
+ //
+ // If LZMA_STR_ALL_FILTERS isn't used we allow only filters that
+ // can be used in .xz.
+ const bool only_xz = (flags & LZMA_STR_ALL_FILTERS) == 0;
+
+ // Use a temporary array so that we don't modify the caller-supplied
+ // one until we know that no errors occurred.
+ lzma_filter temp_filters[LZMA_FILTERS_MAX + 1];
+
+ size_t i = 0;
+ do {
+ if (i == LZMA_FILTERS_MAX) {
+ errmsg = "The maximum number of filters is four";
+ goto error;
+ }
+
+ // Skip "--" if present.
+ if ((*str)[0] == '-' && (*str)[1] == '-')
+ *str += 2;
+
+ // Locate the end of "filter:name1=value1,name2=value2",
+ // stopping at the first "--" or a single space.
+ const char *filter_end = *str;
+ while (filter_end[0] != '\0') {
+ if ((filter_end[0] == '-' && filter_end[1] == '-')
+ || filter_end[0] == ' ')
+ break;
+
+ ++filter_end;
+ }
+
+ // Inputs that have "--" at the end or "-- " in the middle
+ // will result in an empty filter name.
+ if (filter_end == *str) {
+ errmsg = "Filter name is missing";
+ goto error;
+ }
+
+ errmsg = parse_filter(str, filter_end, &temp_filters[i],
+ allocator, only_xz);
+ if (errmsg != NULL)
+ goto error;
+
+ // Skip trailing spaces.
+ while (**str == ' ')
+ ++*str;
+
+ ++i;
+ } while (**str != '\0');
+
+ // Seems to be good, terminate the array so that
+ // basic validation can be done.
+ temp_filters[i].id = LZMA_VLI_UNKNOWN;
+ temp_filters[i].options = NULL;
+
+ // Do basic validation if the application didn't prohibit it.
+ if ((flags & LZMA_STR_NO_VALIDATION) == 0) {
+ size_t dummy;
+ const lzma_ret ret = lzma_validate_chain(temp_filters, &dummy);
+ assert(ret == LZMA_OK || ret == LZMA_OPTIONS_ERROR);
+ if (ret != LZMA_OK) {
+ errmsg = "Invalid filter chain "
+ "('lzma2' missing at the end?)";
+ goto error;
+ }
+ }
+
+ // All good. Copy the filters to the application supplied array.
+ memcpy(filters, temp_filters, (i + 1) * sizeof(lzma_filter));
+ return NULL;
+
+error:
+ // Free the filter options that were successfully decoded.
+ while (i-- > 0)
+ lzma_free(temp_filters[i].options, allocator);
+
+ return errmsg;
+}
+
+
+extern LZMA_API(const char *)
+lzma_str_to_filters(const char *str, int *error_pos, lzma_filter *filters,
+ uint32_t flags, const lzma_allocator *allocator)
+{
+ if (str == NULL || filters == NULL)
+ return "Unexpected NULL pointer argument(s) "
+ "to lzma_str_to_filters()";
+
+ // Validate the flags.
+ const uint32_t supported_flags
+ = LZMA_STR_ALL_FILTERS
+ | LZMA_STR_NO_VALIDATION;
+
+ if (flags & ~supported_flags)
+ return "Unsupported flags to lzma_str_to_filters()";
+
+ const char *used = str;
+ const char *errmsg = str_to_filters(&used, filters, flags, allocator);
+
+ if (error_pos != NULL) {
+ const size_t n = (size_t)(used - str);
+ *error_pos = n > INT_MAX ? INT_MAX : (int)n;
+ }
+
+ return errmsg;
+}
+
+
+/// Converts options of one filter to a string.
+///
+/// The caller must have already put the filter name in the destination
+/// string. Since it is possible that no options will be needed, the caller
+/// won't have put a delimiter character (':' or '=') in the string yet.
+/// We will add it if at least one option will be added to the string.
+static void
+strfy_filter(lzma_str *dest, const char *delimiter,
+ const option_map *optmap, size_t optmap_count,
+ const void *filter_options)
+{
+ for (size_t i = 0; i < optmap_count; ++i) {
+ // No attempt is made to reverse LZMA1/2 preset.
+ if (optmap[i].type == OPTMAP_TYPE_LZMA_PRESET)
+ continue;
+
+ // All options have integer values, some just are mapped
+ // to a string with a name_value_map. LZMA1/2 preset
+ // isn't reversed back to preset=PRESET form.
+ uint32_t v;
+ const void *ptr
+ = (const char *)filter_options + optmap[i].offset;
+ switch (optmap[i].type) {
+ case OPTMAP_TYPE_LZMA_MODE:
+ v = *(const lzma_mode *)ptr;
+ break;
+
+ case OPTMAP_TYPE_LZMA_MATCH_FINDER:
+ v = *(const lzma_match_finder *)ptr;
+ break;
+
+ default:
+ v = *(const uint32_t *)ptr;
+ break;
+ }
+
+ // Skip this if this option should be omitted from
+ // the string when the value is zero.
+ if (v == 0 && (optmap[i].flags & OPTMAP_NO_STRFY_ZERO))
+ continue;
+
+ // Before the first option we add whatever delimiter
+ // the caller gave us. For later options a comma is used.
+ str_append_str(dest, delimiter);
+ delimiter = ",";
+
+ // Add the option name and equals sign.
+ str_append_str(dest, optmap[i].name);
+ str_append_str(dest, "=");
+
+ if (optmap[i].flags & OPTMAP_USE_NAME_VALUE_MAP) {
+ const name_value_map *map = optmap[i].u.map;
+ size_t j = 0;
+ while (true) {
+ if (map[j].name[0] == '\0') {
+ str_append_str(dest, "UNKNOWN");
+ break;
+ }
+
+ if (map[j].value == v) {
+ str_append_str(dest, map[j].name);
+ break;
+ }
+
+ ++j;
+ }
+ } else {
+ str_append_u32(dest, v,
+ optmap[i].flags & OPTMAP_USE_BYTE_SUFFIX);
+ }
+ }
+
+ return;
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_str_from_filters(char **output_str, const lzma_filter *filters,
+ uint32_t flags, const lzma_allocator *allocator)
+{
+ // On error *output_str is always set to NULL.
+ // Do it as the very first step.
+ if (output_str == NULL)
+ return LZMA_PROG_ERROR;
+
+ *output_str = NULL;
+
+ if (filters == NULL)
+ return LZMA_PROG_ERROR;
+
+ // Validate the flags.
+ const uint32_t supported_flags
+ = LZMA_STR_ENCODER
+ | LZMA_STR_DECODER
+ | LZMA_STR_GETOPT_LONG
+ | LZMA_STR_NO_SPACES;
+
+ if (flags & ~supported_flags)
+ return LZMA_OPTIONS_ERROR;
+
+ // There must be at least one filter.
+ if (filters[0].id == LZMA_VLI_UNKNOWN)
+ return LZMA_OPTIONS_ERROR;
+
+ // Allocate memory for the output string.
+ lzma_str dest;
+ return_if_error(str_init(&dest, allocator));
+
+ const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));
+
+ const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";
+
+ for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
+ // If we reach LZMA_FILTERS_MAX, then the filters array
+ // is too large since the ID cannot be LZMA_VLI_UNKNOWN here.
+ if (i == LZMA_FILTERS_MAX) {
+ str_free(&dest, allocator);
+ return LZMA_OPTIONS_ERROR;
+ }
+
+ // Don't add a space between filters if the caller
+ // doesn't want them.
+ if (i > 0 && !(flags & LZMA_STR_NO_SPACES))
+ str_append_str(&dest, " ");
+
+ // Use dashes for xz getopt_long() compatible syntax but also
+ // use dashes to separate filters when spaces weren't wanted.
+ if ((flags & LZMA_STR_GETOPT_LONG)
+ || (i > 0 && (flags & LZMA_STR_NO_SPACES)))
+ str_append_str(&dest, "--");
+
+ size_t j = 0;
+ while (true) {
+ if (j == ARRAY_SIZE(filter_name_map)) {
+ // Filter ID in filters[i].id isn't supported.
+ str_free(&dest, allocator);
+ return LZMA_OPTIONS_ERROR;
+ }
+
+ if (filter_name_map[j].id == filters[i].id) {
+ // Add the filter name.
+ str_append_str(&dest, filter_name_map[j].name);
+
+ // If only the filter names were wanted then
+ // skip to the next filter. In this case
+ // .options is ignored and may be NULL even
+ // when the filter doesn't allow NULL options.
+ if (!show_opts)
+ break;
+
+ if (filters[i].options == NULL) {
+ if (!filter_name_map[j].allow_null) {
+ // Filter-specific options
+ // are missing but with
+ // this filter the options
+ // structure is mandatory.
+ str_free(&dest, allocator);
+ return LZMA_OPTIONS_ERROR;
+ }
+
+ // .options is allowed to be NULL.
+ // There is no need to add any
+ // options to the string.
+ break;
+ }
+
+ // Options structure is available. Add
+ // the filter options to the string.
+ const size_t optmap_count
+ = (flags & LZMA_STR_ENCODER)
+ ? filter_name_map[j].strfy_encoder
+ : filter_name_map[j].strfy_decoder;
+ strfy_filter(&dest, opt_delim,
+ filter_name_map[j].optmap,
+ optmap_count,
+ filters[i].options);
+ break;
+ }
+
+ ++j;
+ }
+ }
+
+ return str_finish(output_str, &dest, allocator);
+}
+
+
+extern LZMA_API(lzma_ret)
+lzma_str_list_filters(char **output_str, lzma_vli filter_id, uint32_t flags,
+ const lzma_allocator *allocator)
+{
+ // On error *output_str is always set to NULL.
+ // Do it as the very first step.
+ if (output_str == NULL)
+ return LZMA_PROG_ERROR;
+
+ *output_str = NULL;
+
+ // Validate the flags.
+ const uint32_t supported_flags
+ = LZMA_STR_ALL_FILTERS
+ | LZMA_STR_ENCODER
+ | LZMA_STR_DECODER
+ | LZMA_STR_GETOPT_LONG;
+
+ if (flags & ~supported_flags)
+ return LZMA_OPTIONS_ERROR;
+
+ // Allocate memory for the output string.
+ lzma_str dest;
+ return_if_error(str_init(&dest, allocator));
+
+ // If only listing the filter names then separate them with spaces.
+ // Otherwise use newlines.
+ const bool show_opts = (flags & (LZMA_STR_ENCODER | LZMA_STR_DECODER));
+ const char *filter_delim = show_opts ? "\n" : " ";
+
+ const char *opt_delim = (flags & LZMA_STR_GETOPT_LONG) ? "=" : ":";
+ bool first_filter_printed = false;
+
+ for (size_t i = 0; i < ARRAY_SIZE(filter_name_map); ++i) {
+ // If we are printing only one filter then skip others.
+ if (filter_id != LZMA_VLI_UNKNOWN
+ && filter_id != filter_name_map[i].id)
+ continue;
+
+ // If we are printing only .xz filters then skip the others.
+ if (filter_name_map[i].id >= LZMA_FILTER_RESERVED_START
+ && (flags & LZMA_STR_ALL_FILTERS) == 0
+ && filter_id == LZMA_VLI_UNKNOWN)
+ continue;
+
+ // Add a new line if this isn't the first filter being
+ // written to the string.
+ if (first_filter_printed)
+ str_append_str(&dest, filter_delim);
+
+ first_filter_printed = true;
+
+ if (flags & LZMA_STR_GETOPT_LONG)
+ str_append_str(&dest, "--");
+
+ str_append_str(&dest, filter_name_map[i].name);
+
+ // If only the filter names were wanted then continue
+ // to the next filter.
+ if (!show_opts)
+ continue;
+
+ const option_map *optmap = filter_name_map[i].optmap;
+ const char *d = opt_delim;
+
+ const size_t end = (flags & LZMA_STR_ENCODER)
+ ? filter_name_map[i].strfy_encoder
+ : filter_name_map[i].strfy_decoder;
+
+ for (size_t j = 0; j < end; ++j) {
+ // The first option is delimited from the filter
+ // name using "=" or ":" and the rest of the options
+ // are separated with ",".
+ str_append_str(&dest, d);
+ d = ",";
+
+ // optname=<possible_values>
+ str_append_str(&dest, optmap[j].name);
+ str_append_str(&dest, "=<");
+
+ if (optmap[j].type == OPTMAP_TYPE_LZMA_PRESET) {
+ // LZMA1/2 preset has its custom help string.
+ str_append_str(&dest, LZMA12_PRESET_STR);
+ } else if (optmap[j].flags
+ & OPTMAP_USE_NAME_VALUE_MAP) {
+ // Separate the possible option values by "|".
+ const name_value_map *m = optmap[j].u.map;
+ for (size_t k = 0; m[k].name[0] != '\0'; ++k) {
+ if (k > 0)
+ str_append_str(&dest, "|");
+
+ str_append_str(&dest, m[k].name);
+ }
+ } else {
+ // Integer range is shown as min-max.
+ const bool use_byte_suffix = optmap[j].flags
+ & OPTMAP_USE_BYTE_SUFFIX;
+ str_append_u32(&dest, optmap[j].u.range.min,
+ use_byte_suffix);
+ str_append_str(&dest, "-");
+ str_append_u32(&dest, optmap[j].u.range.max,
+ use_byte_suffix);
+ }
+
+ str_append_str(&dest, ">");
+ }
+ }
+
+ // If no filters were added to the string then it must be because
+ // the caller provided an unsupported Filter ID.
+ if (!first_filter_printed) {
+ str_free(&dest, allocator);
+ return LZMA_OPTIONS_ERROR;
+ }
+
+ return str_finish(output_str, &dest, allocator);
+}
diff --git a/src/liblzma/common/vli_decoder.c b/src/liblzma/common/vli_decoder.c
new file mode 100644
index 0000000..af2799d
--- /dev/null
+++ b/src/liblzma/common/vli_decoder.c
@@ -0,0 +1,86 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file vli_decoder.c
+/// \brief Decodes variable-length integers
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+
+
+extern LZMA_API(lzma_ret)
+lzma_vli_decode(lzma_vli *restrict vli, size_t *vli_pos,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size)
+{
+ // If we haven't been given vli_pos, work in single-call mode.
+ size_t vli_pos_internal = 0;
+ if (vli_pos == NULL) {
+ vli_pos = &vli_pos_internal;
+ *vli = 0;
+
+ // If there's no input, use LZMA_DATA_ERROR. This way it is
+ // easy to decode VLIs from buffers that have known size,
+ // and get the correct error code in case the buffer is
+ // too short.
+ if (*in_pos >= in_size)
+ return LZMA_DATA_ERROR;
+
+ } else {
+ // Initialize *vli when starting to decode a new integer.
+ if (*vli_pos == 0)
+ *vli = 0;
+
+ // Validate the arguments.
+ if (*vli_pos >= LZMA_VLI_BYTES_MAX
+ || (*vli >> (*vli_pos * 7)) != 0)
+ return LZMA_PROG_ERROR;;
+
+ if (*in_pos >= in_size)
+ return LZMA_BUF_ERROR;
+ }
+
+ do {
+ // Read the next byte. Use a temporary variable so that we
+ // can update *in_pos immediately.
+ const uint8_t byte = in[*in_pos];
+ ++*in_pos;
+
+ // Add the newly read byte to *vli.
+ *vli += (lzma_vli)(byte & 0x7F) << (*vli_pos * 7);
+ ++*vli_pos;
+
+ // Check if this is the last byte of a multibyte integer.
+ if ((byte & 0x80) == 0) {
+ // We don't allow using variable-length integers as
+ // padding i.e. the encoding must use the most the
+ // compact form.
+ if (byte == 0x00 && *vli_pos > 1)
+ return LZMA_DATA_ERROR;
+
+ return vli_pos == &vli_pos_internal
+ ? LZMA_OK : LZMA_STREAM_END;
+ }
+
+ // There is at least one more byte coming. If we have already
+ // read maximum number of bytes, the integer is considered
+ // corrupt.
+ //
+ // If we need bigger integers in future, old versions liblzma
+ // will confusingly indicate the file being corrupt instead of
+ // unsupported. I suppose it's still better this way, because
+ // in the foreseeable future (writing this in 2008) the only
+ // reason why files would appear having over 63-bit integers
+ // is that the files are simply corrupt.
+ if (*vli_pos == LZMA_VLI_BYTES_MAX)
+ return LZMA_DATA_ERROR;
+
+ } while (*in_pos < in_size);
+
+ return vli_pos == &vli_pos_internal ? LZMA_DATA_ERROR : LZMA_OK;
+}
diff --git a/src/liblzma/common/vli_encoder.c b/src/liblzma/common/vli_encoder.c
new file mode 100644
index 0000000..f864269
--- /dev/null
+++ b/src/liblzma/common/vli_encoder.c
@@ -0,0 +1,69 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file vli_encoder.c
+/// \brief Encodes variable-length integers
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+
+
+extern LZMA_API(lzma_ret)
+lzma_vli_encode(lzma_vli vli, size_t *vli_pos,
+ uint8_t *restrict out, size_t *restrict out_pos,
+ size_t out_size)
+{
+ // If we haven't been given vli_pos, work in single-call mode.
+ size_t vli_pos_internal = 0;
+ if (vli_pos == NULL) {
+ vli_pos = &vli_pos_internal;
+
+ // In single-call mode, we expect that the caller has
+ // reserved enough output space.
+ if (*out_pos >= out_size)
+ return LZMA_PROG_ERROR;
+ } else {
+ // This never happens when we are called by liblzma, but
+ // may happen if called directly from an application.
+ if (*out_pos >= out_size)
+ return LZMA_BUF_ERROR;
+ }
+
+ // Validate the arguments.
+ if (*vli_pos >= LZMA_VLI_BYTES_MAX || vli > LZMA_VLI_MAX)
+ return LZMA_PROG_ERROR;
+
+ // Shift vli so that the next bits to encode are the lowest. In
+ // single-call mode this never changes vli since *vli_pos is zero.
+ vli >>= *vli_pos * 7;
+
+ // Write the non-last bytes in a loop.
+ while (vli >= 0x80) {
+ // We don't need *vli_pos during this function call anymore,
+ // but update it here so that it is ready if we need to
+ // return before the whole integer has been decoded.
+ ++*vli_pos;
+ assert(*vli_pos < LZMA_VLI_BYTES_MAX);
+
+ // Write the next byte.
+ out[*out_pos] = (uint8_t)(vli) | 0x80;
+ vli >>= 7;
+
+ if (++*out_pos == out_size)
+ return vli_pos == &vli_pos_internal
+ ? LZMA_PROG_ERROR : LZMA_OK;
+ }
+
+ // Write the last byte.
+ out[*out_pos] = (uint8_t)(vli);
+ ++*out_pos;
+ ++*vli_pos;
+
+ return vli_pos == &vli_pos_internal ? LZMA_OK : LZMA_STREAM_END;
+
+}
diff --git a/src/liblzma/common/vli_size.c b/src/liblzma/common/vli_size.c
new file mode 100644
index 0000000..ec1b4fa
--- /dev/null
+++ b/src/liblzma/common/vli_size.c
@@ -0,0 +1,30 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file vli_size.c
+/// \brief Calculates the encoded size of a variable-length integer
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "common.h"
+
+
+extern LZMA_API(uint32_t)
+lzma_vli_size(lzma_vli vli)
+{
+ if (vli > LZMA_VLI_MAX)
+ return 0;
+
+ uint32_t i = 0;
+ do {
+ vli >>= 7;
+ ++i;
+ } while (vli != 0);
+
+ assert(i <= LZMA_VLI_BYTES_MAX);
+ return i;
+}