summaryrefslogtreecommitdiffstats
path: root/src/liblzma/simple
diff options
context:
space:
mode:
Diffstat (limited to 'src/liblzma/simple')
-rw-r--r--src/liblzma/simple/Makefile.inc51
-rw-r--r--src/liblzma/simple/arm.c75
-rw-r--r--src/liblzma/simple/arm64.c137
-rw-r--r--src/liblzma/simple/armthumb.c80
-rw-r--r--src/liblzma/simple/ia64.c116
-rw-r--r--src/liblzma/simple/powerpc.c80
-rw-r--r--src/liblzma/simple/simple_coder.c292
-rw-r--r--src/liblzma/simple/simple_coder.h81
-rw-r--r--src/liblzma/simple/simple_decoder.c40
-rw-r--r--src/liblzma/simple/simple_decoder.h22
-rw-r--r--src/liblzma/simple/simple_encoder.c38
-rw-r--r--src/liblzma/simple/simple_encoder.h23
-rw-r--r--src/liblzma/simple/simple_private.h74
-rw-r--r--src/liblzma/simple/sparc.c87
-rw-r--r--src/liblzma/simple/x86.c163
15 files changed, 1359 insertions, 0 deletions
diff --git a/src/liblzma/simple/Makefile.inc b/src/liblzma/simple/Makefile.inc
new file mode 100644
index 0000000..dc092f9
--- /dev/null
+++ b/src/liblzma/simple/Makefile.inc
@@ -0,0 +1,51 @@
+##
+## Author: Lasse Collin
+##
+## This file has been put into the public domain.
+## You can do whatever you want with this file.
+##
+
+liblzma_la_SOURCES += \
+ simple/simple_coder.c \
+ simple/simple_coder.h \
+ simple/simple_private.h
+
+if COND_ENCODER_SIMPLE
+liblzma_la_SOURCES += \
+ simple/simple_encoder.c \
+ simple/simple_encoder.h
+endif
+
+if COND_DECODER_SIMPLE
+liblzma_la_SOURCES += \
+ simple/simple_decoder.c \
+ simple/simple_decoder.h
+endif
+
+if COND_FILTER_X86
+liblzma_la_SOURCES += simple/x86.c
+endif
+
+if COND_FILTER_POWERPC
+liblzma_la_SOURCES += simple/powerpc.c
+endif
+
+if COND_FILTER_IA64
+liblzma_la_SOURCES += simple/ia64.c
+endif
+
+if COND_FILTER_ARM
+liblzma_la_SOURCES += simple/arm.c
+endif
+
+if COND_FILTER_ARMTHUMB
+liblzma_la_SOURCES += simple/armthumb.c
+endif
+
+if COND_FILTER_ARM64
+liblzma_la_SOURCES += simple/arm64.c
+endif
+
+if COND_FILTER_SPARC
+liblzma_la_SOURCES += simple/sparc.c
+endif
diff --git a/src/liblzma/simple/arm.c b/src/liblzma/simple/arm.c
new file mode 100644
index 0000000..6e53970
--- /dev/null
+++ b/src/liblzma/simple/arm.c
@@ -0,0 +1,75 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file arm.c
+/// \brief Filter for ARM binaries
+///
+// Authors: Igor Pavlov
+// Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "simple_private.h"
+
+
+static size_t
+arm_code(void *simple lzma_attribute((__unused__)),
+ uint32_t now_pos, bool is_encoder,
+ uint8_t *buffer, size_t size)
+{
+ size_t i;
+ for (i = 0; i + 4 <= size; i += 4) {
+ if (buffer[i + 3] == 0xEB) {
+ uint32_t src = ((uint32_t)(buffer[i + 2]) << 16)
+ | ((uint32_t)(buffer[i + 1]) << 8)
+ | (uint32_t)(buffer[i + 0]);
+ src <<= 2;
+
+ uint32_t dest;
+ if (is_encoder)
+ dest = now_pos + (uint32_t)(i) + 8 + src;
+ else
+ dest = src - (now_pos + (uint32_t)(i) + 8);
+
+ dest >>= 2;
+ buffer[i + 2] = (dest >> 16);
+ buffer[i + 1] = (dest >> 8);
+ buffer[i + 0] = dest;
+ }
+ }
+
+ return i;
+}
+
+
+static lzma_ret
+arm_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter_info *filters, bool is_encoder)
+{
+ return lzma_simple_coder_init(next, allocator, filters,
+ &arm_code, 0, 4, 4, is_encoder);
+}
+
+
+#ifdef HAVE_ENCODER_ARM
+extern lzma_ret
+lzma_simple_arm_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ return arm_coder_init(next, allocator, filters, true);
+}
+#endif
+
+
+#ifdef HAVE_DECODER_ARM
+extern lzma_ret
+lzma_simple_arm_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ return arm_coder_init(next, allocator, filters, false);
+}
+#endif
diff --git a/src/liblzma/simple/arm64.c b/src/liblzma/simple/arm64.c
new file mode 100644
index 0000000..0fe0824
--- /dev/null
+++ b/src/liblzma/simple/arm64.c
@@ -0,0 +1,137 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file arm64.c
+/// \brief Filter for ARM64 binaries
+///
+/// This converts ARM64 relative addresses in the BL and ADRP immediates
+/// to absolute values to increase redundancy of ARM64 code.
+///
+/// Converting B or ADR instructions was also tested but it's not useful.
+/// A majority of the jumps for the B instruction are very small (+/- 0xFF).
+/// These are typical for loops and if-statements. Encoding them to their
+/// absolute address reduces redundancy since many of the small relative
+/// jump values are repeated, but very few of the absolute addresses are.
+//
+// Authors: Lasse Collin
+// Jia Tan
+// Igor Pavlov
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "simple_private.h"
+
+
+static size_t
+arm64_code(void *simple lzma_attribute((__unused__)),
+ uint32_t now_pos, bool is_encoder,
+ uint8_t *buffer, size_t size)
+{
+ size_t i;
+
+ // Clang 14.0.6 on x86-64 makes this four times bigger and 40 % slower
+ // with auto-vectorization that is enabled by default with -O2.
+ // Such vectorization bloat happens with -O2 when targeting ARM64 too
+ // but performance hasn't been tested.
+#ifdef __clang__
+# pragma clang loop vectorize(disable)
+#endif
+ for (i = 0; i + 4 <= size; i += 4) {
+ uint32_t pc = (uint32_t)(now_pos + i);
+ uint32_t instr = read32le(buffer + i);
+
+ if ((instr >> 26) == 0x25) {
+ // BL instruction:
+ // The full 26-bit immediate is converted.
+ // The range is +/-128 MiB.
+ //
+ // Using the full range is helps quite a lot with
+ // big executables. Smaller range would reduce false
+ // positives in non-code sections of the input though
+ // so this is a compromise that slightly favors big
+ // files. With the full range only six bits of the 32
+ // need to match to trigger a conversion.
+ const uint32_t src = instr;
+ instr = 0x94000000;
+
+ pc >>= 2;
+ if (!is_encoder)
+ pc = 0U - pc;
+
+ instr |= (src + pc) & 0x03FFFFFF;
+ write32le(buffer + i, instr);
+
+ } else if ((instr & 0x9F000000) == 0x90000000) {
+ // ADRP instruction:
+ // Only values in the range +/-512 MiB are converted.
+ //
+ // Using less than the full +/-4 GiB range reduces
+ // false positives on non-code sections of the input
+ // while being excellent for executables up to 512 MiB.
+ // The positive effect of ADRP conversion is smaller
+ // than that of BL but it also doesn't hurt so much in
+ // non-code sections of input because, with +/-512 MiB
+ // range, nine bits of 32 need to match to trigger a
+ // conversion (two 10-bit match choices = 9 bits).
+ const uint32_t src = ((instr >> 29) & 3)
+ | ((instr >> 3) & 0x001FFFFC);
+
+ // With the addition only one branch is needed to
+ // check the +/- range. This is usually false when
+ // processing ARM64 code so branch prediction will
+ // handle it well in terms of performance.
+ //
+ //if ((src & 0x001E0000) != 0
+ // && (src & 0x001E0000) != 0x001E0000)
+ if ((src + 0x00020000) & 0x001C0000)
+ continue;
+
+ instr &= 0x9000001F;
+
+ pc >>= 12;
+ if (!is_encoder)
+ pc = 0U - pc;
+
+ const uint32_t dest = src + pc;
+ instr |= (dest & 3) << 29;
+ instr |= (dest & 0x0003FFFC) << 3;
+ instr |= (0U - (dest & 0x00020000)) & 0x00E00000;
+ write32le(buffer + i, instr);
+ }
+ }
+
+ return i;
+}
+
+
+static lzma_ret
+arm64_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter_info *filters, bool is_encoder)
+{
+ return lzma_simple_coder_init(next, allocator, filters,
+ &arm64_code, 0, 4, 4, is_encoder);
+}
+
+
+#ifdef HAVE_ENCODER_ARM64
+extern lzma_ret
+lzma_simple_arm64_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ return arm64_coder_init(next, allocator, filters, true);
+}
+#endif
+
+
+#ifdef HAVE_DECODER_ARM64
+extern lzma_ret
+lzma_simple_arm64_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ return arm64_coder_init(next, allocator, filters, false);
+}
+#endif
diff --git a/src/liblzma/simple/armthumb.c b/src/liblzma/simple/armthumb.c
new file mode 100644
index 0000000..25d8dbd
--- /dev/null
+++ b/src/liblzma/simple/armthumb.c
@@ -0,0 +1,80 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file armthumb.c
+/// \brief Filter for ARM-Thumb binaries
+///
+// Authors: Igor Pavlov
+// Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "simple_private.h"
+
+
+static size_t
+armthumb_code(void *simple lzma_attribute((__unused__)),
+ uint32_t now_pos, bool is_encoder,
+ uint8_t *buffer, size_t size)
+{
+ size_t i;
+ for (i = 0; i + 4 <= size; i += 2) {
+ if ((buffer[i + 1] & 0xF8) == 0xF0
+ && (buffer[i + 3] & 0xF8) == 0xF8) {
+ uint32_t src = (((uint32_t)(buffer[i + 1]) & 7) << 19)
+ | ((uint32_t)(buffer[i + 0]) << 11)
+ | (((uint32_t)(buffer[i + 3]) & 7) << 8)
+ | (uint32_t)(buffer[i + 2]);
+
+ src <<= 1;
+
+ uint32_t dest;
+ if (is_encoder)
+ dest = now_pos + (uint32_t)(i) + 4 + src;
+ else
+ dest = src - (now_pos + (uint32_t)(i) + 4);
+
+ dest >>= 1;
+ buffer[i + 1] = 0xF0 | ((dest >> 19) & 0x7);
+ buffer[i + 0] = (dest >> 11);
+ buffer[i + 3] = 0xF8 | ((dest >> 8) & 0x7);
+ buffer[i + 2] = (dest);
+ i += 2;
+ }
+ }
+
+ return i;
+}
+
+
+static lzma_ret
+armthumb_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter_info *filters, bool is_encoder)
+{
+ return lzma_simple_coder_init(next, allocator, filters,
+ &armthumb_code, 0, 4, 2, is_encoder);
+}
+
+
+#ifdef HAVE_ENCODER_ARMTHUMB
+extern lzma_ret
+lzma_simple_armthumb_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ return armthumb_coder_init(next, allocator, filters, true);
+}
+#endif
+
+
+#ifdef HAVE_DECODER_ARMTHUMB
+extern lzma_ret
+lzma_simple_armthumb_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ return armthumb_coder_init(next, allocator, filters, false);
+}
+#endif
diff --git a/src/liblzma/simple/ia64.c b/src/liblzma/simple/ia64.c
new file mode 100644
index 0000000..692b0a2
--- /dev/null
+++ b/src/liblzma/simple/ia64.c
@@ -0,0 +1,116 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file ia64.c
+/// \brief Filter for IA64 (Itanium) binaries
+///
+// Authors: Igor Pavlov
+// Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "simple_private.h"
+
+
+static size_t
+ia64_code(void *simple lzma_attribute((__unused__)),
+ uint32_t now_pos, bool is_encoder,
+ uint8_t *buffer, size_t size)
+{
+ static const uint32_t BRANCH_TABLE[32] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 4, 4, 6, 6, 0, 0, 7, 7,
+ 4, 4, 0, 0, 4, 4, 0, 0
+ };
+
+ size_t i;
+ for (i = 0; i + 16 <= size; i += 16) {
+ const uint32_t instr_template = buffer[i] & 0x1F;
+ const uint32_t mask = BRANCH_TABLE[instr_template];
+ uint32_t bit_pos = 5;
+
+ for (size_t slot = 0; slot < 3; ++slot, bit_pos += 41) {
+ if (((mask >> slot) & 1) == 0)
+ continue;
+
+ const size_t byte_pos = (bit_pos >> 3);
+ const uint32_t bit_res = bit_pos & 0x7;
+ uint64_t instruction = 0;
+
+ for (size_t j = 0; j < 6; ++j)
+ instruction += (uint64_t)(
+ buffer[i + j + byte_pos])
+ << (8 * j);
+
+ uint64_t inst_norm = instruction >> bit_res;
+
+ if (((inst_norm >> 37) & 0xF) == 0x5
+ && ((inst_norm >> 9) & 0x7) == 0
+ /* && (inst_norm & 0x3F)== 0 */
+ ) {
+ uint32_t src = (uint32_t)(
+ (inst_norm >> 13) & 0xFFFFF);
+ src |= ((inst_norm >> 36) & 1) << 20;
+
+ src <<= 4;
+
+ uint32_t dest;
+ if (is_encoder)
+ dest = now_pos + (uint32_t)(i) + src;
+ else
+ dest = src - (now_pos + (uint32_t)(i));
+
+ dest >>= 4;
+
+ inst_norm &= ~((uint64_t)(0x8FFFFF) << 13);
+ inst_norm |= (uint64_t)(dest & 0xFFFFF) << 13;
+ inst_norm |= (uint64_t)(dest & 0x100000)
+ << (36 - 20);
+
+ instruction &= (1U << bit_res) - 1;
+ instruction |= (inst_norm << bit_res);
+
+ for (size_t j = 0; j < 6; j++)
+ buffer[i + j + byte_pos] = (uint8_t)(
+ instruction
+ >> (8 * j));
+ }
+ }
+ }
+
+ return i;
+}
+
+
+static lzma_ret
+ia64_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter_info *filters, bool is_encoder)
+{
+ return lzma_simple_coder_init(next, allocator, filters,
+ &ia64_code, 0, 16, 16, is_encoder);
+}
+
+
+#ifdef HAVE_ENCODER_IA64
+extern lzma_ret
+lzma_simple_ia64_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ return ia64_coder_init(next, allocator, filters, true);
+}
+#endif
+
+
+#ifdef HAVE_DECODER_IA64
+extern lzma_ret
+lzma_simple_ia64_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ return ia64_coder_init(next, allocator, filters, false);
+}
+#endif
diff --git a/src/liblzma/simple/powerpc.c b/src/liblzma/simple/powerpc.c
new file mode 100644
index 0000000..3a340fd
--- /dev/null
+++ b/src/liblzma/simple/powerpc.c
@@ -0,0 +1,80 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file powerpc.c
+/// \brief Filter for PowerPC (big endian) binaries
+///
+// Authors: Igor Pavlov
+// Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "simple_private.h"
+
+
+static size_t
+powerpc_code(void *simple lzma_attribute((__unused__)),
+ uint32_t now_pos, bool is_encoder,
+ uint8_t *buffer, size_t size)
+{
+ size_t i;
+ for (i = 0; i + 4 <= size; i += 4) {
+ // PowerPC branch 6(48) 24(Offset) 1(Abs) 1(Link)
+ if ((buffer[i] >> 2) == 0x12
+ && ((buffer[i + 3] & 3) == 1)) {
+
+ const uint32_t src
+ = (((uint32_t)(buffer[i + 0]) & 3) << 24)
+ | ((uint32_t)(buffer[i + 1]) << 16)
+ | ((uint32_t)(buffer[i + 2]) << 8)
+ | ((uint32_t)(buffer[i + 3]) & ~UINT32_C(3));
+
+ uint32_t dest;
+ if (is_encoder)
+ dest = now_pos + (uint32_t)(i) + src;
+ else
+ dest = src - (now_pos + (uint32_t)(i));
+
+ buffer[i + 0] = 0x48 | ((dest >> 24) & 0x03);
+ buffer[i + 1] = (dest >> 16);
+ buffer[i + 2] = (dest >> 8);
+ buffer[i + 3] &= 0x03;
+ buffer[i + 3] |= dest;
+ }
+ }
+
+ return i;
+}
+
+
+static lzma_ret
+powerpc_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter_info *filters, bool is_encoder)
+{
+ return lzma_simple_coder_init(next, allocator, filters,
+ &powerpc_code, 0, 4, 4, is_encoder);
+}
+
+
+#ifdef HAVE_ENCODER_POWERPC
+extern lzma_ret
+lzma_simple_powerpc_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ return powerpc_coder_init(next, allocator, filters, true);
+}
+#endif
+
+
+#ifdef HAVE_DECODER_POWERPC
+extern lzma_ret
+lzma_simple_powerpc_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ return powerpc_coder_init(next, allocator, filters, false);
+}
+#endif
diff --git a/src/liblzma/simple/simple_coder.c b/src/liblzma/simple/simple_coder.c
new file mode 100644
index 0000000..ed2d7fb
--- /dev/null
+++ b/src/liblzma/simple/simple_coder.c
@@ -0,0 +1,292 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file simple_coder.c
+/// \brief Wrapper for simple filters
+///
+/// Simple filters don't change the size of the data i.e. number of bytes
+/// in equals the number of bytes out.
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "simple_private.h"
+
+
+/// Copied or encodes/decodes more data to out[].
+static lzma_ret
+copy_or_code(lzma_simple_coder *coder, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ assert(!coder->end_was_reached);
+
+ if (coder->next.code == NULL) {
+ lzma_bufcpy(in, in_pos, in_size, out, out_pos, out_size);
+
+ // Check if end of stream was reached.
+ if (coder->is_encoder && action == LZMA_FINISH
+ && *in_pos == in_size)
+ coder->end_was_reached = true;
+
+ } else {
+ // Call the next coder in the chain to provide us some data.
+ const lzma_ret ret = coder->next.code(
+ coder->next.coder, allocator,
+ in, in_pos, in_size,
+ out, out_pos, out_size, action);
+
+ if (ret == LZMA_STREAM_END) {
+ assert(!coder->is_encoder
+ || action == LZMA_FINISH);
+ coder->end_was_reached = true;
+
+ } else if (ret != LZMA_OK) {
+ return ret;
+ }
+ }
+
+ return LZMA_OK;
+}
+
+
+static size_t
+call_filter(lzma_simple_coder *coder, uint8_t *buffer, size_t size)
+{
+ const size_t filtered = coder->filter(coder->simple,
+ coder->now_pos, coder->is_encoder,
+ buffer, size);
+ coder->now_pos += filtered;
+ return filtered;
+}
+
+
+static lzma_ret
+simple_code(void *coder_ptr, const lzma_allocator *allocator,
+ const uint8_t *restrict in, size_t *restrict in_pos,
+ size_t in_size, uint8_t *restrict out,
+ size_t *restrict out_pos, size_t out_size, lzma_action action)
+{
+ lzma_simple_coder *coder = coder_ptr;
+
+ // TODO: Add partial support for LZMA_SYNC_FLUSH. We can support it
+ // in cases when the filter is able to filter everything. With most
+ // simple filters it can be done at offset that is a multiple of 2,
+ // 4, or 16. With x86 filter, it needs good luck, and thus cannot
+ // be made to work predictably.
+ if (action == LZMA_SYNC_FLUSH)
+ return LZMA_OPTIONS_ERROR;
+
+ // Flush already filtered data from coder->buffer[] to out[].
+ if (coder->pos < coder->filtered) {
+ lzma_bufcpy(coder->buffer, &coder->pos, coder->filtered,
+ out, out_pos, out_size);
+
+ // If we couldn't flush all the filtered data, return to
+ // application immediately.
+ if (coder->pos < coder->filtered)
+ return LZMA_OK;
+
+ if (coder->end_was_reached) {
+ assert(coder->filtered == coder->size);
+ return LZMA_STREAM_END;
+ }
+ }
+
+ // If we get here, there is no filtered data left in the buffer.
+ coder->filtered = 0;
+
+ assert(!coder->end_was_reached);
+
+ // If there is more output space left than there is unfiltered data
+ // in coder->buffer[], flush coder->buffer[] to out[], and copy/code
+ // more data to out[] hopefully filling it completely. Then filter
+ // the data in out[]. This step is where most of the data gets
+ // filtered if the buffer sizes used by the application are reasonable.
+ const size_t out_avail = out_size - *out_pos;
+ const size_t buf_avail = coder->size - coder->pos;
+ if (out_avail > buf_avail || buf_avail == 0) {
+ // Store the old position so that we know from which byte
+ // to start filtering.
+ const size_t out_start = *out_pos;
+
+ // Flush data from coder->buffer[] to out[], but don't reset
+ // coder->pos and coder->size yet. This way the coder can be
+ // restarted if the next filter in the chain returns e.g.
+ // LZMA_MEM_ERROR.
+ //
+ // Do the memcpy() conditionally because out can be NULL
+ // (in which case buf_avail is always 0). Calling memcpy()
+ // with a null-pointer is undefined even if the third
+ // argument is 0.
+ if (buf_avail > 0)
+ memcpy(out + *out_pos, coder->buffer + coder->pos,
+ buf_avail);
+
+ *out_pos += buf_avail;
+
+ // Copy/Encode/Decode more data to out[].
+ {
+ const lzma_ret ret = copy_or_code(coder, allocator,
+ in, in_pos, in_size,
+ out, out_pos, out_size, action);
+ assert(ret != LZMA_STREAM_END);
+ if (ret != LZMA_OK)
+ return ret;
+ }
+
+ // Filter out[] unless there is nothing to filter.
+ // This way we avoid null pointer + 0 (undefined behavior)
+ // when out == NULL.
+ const size_t size = *out_pos - out_start;
+ const size_t filtered = size == 0 ? 0 : call_filter(
+ coder, out + out_start, size);
+
+ const size_t unfiltered = size - filtered;
+ assert(unfiltered <= coder->allocated / 2);
+
+ // Now we can update coder->pos and coder->size, because
+ // the next coder in the chain (if any) was successful.
+ coder->pos = 0;
+ coder->size = unfiltered;
+
+ if (coder->end_was_reached) {
+ // The last byte has been copied to out[] already.
+ // They are left as is.
+ coder->size = 0;
+
+ } else if (unfiltered > 0) {
+ // There is unfiltered data left in out[]. Copy it to
+ // coder->buffer[] and rewind *out_pos appropriately.
+ *out_pos -= unfiltered;
+ memcpy(coder->buffer, out + *out_pos, unfiltered);
+ }
+ } else if (coder->pos > 0) {
+ memmove(coder->buffer, coder->buffer + coder->pos, buf_avail);
+ coder->size -= coder->pos;
+ coder->pos = 0;
+ }
+
+ assert(coder->pos == 0);
+
+ // If coder->buffer[] isn't empty, try to fill it by copying/decoding
+ // more data. Then filter coder->buffer[] and copy the successfully
+ // filtered data to out[]. It is probable, that some filtered and
+ // unfiltered data will be left to coder->buffer[].
+ if (coder->size > 0) {
+ {
+ const lzma_ret ret = copy_or_code(coder, allocator,
+ in, in_pos, in_size,
+ coder->buffer, &coder->size,
+ coder->allocated, action);
+ assert(ret != LZMA_STREAM_END);
+ if (ret != LZMA_OK)
+ return ret;
+ }
+
+ coder->filtered = call_filter(
+ coder, coder->buffer, coder->size);
+
+ // Everything is considered to be filtered if coder->buffer[]
+ // contains the last bytes of the data.
+ if (coder->end_was_reached)
+ coder->filtered = coder->size;
+
+ // Flush as much as possible.
+ lzma_bufcpy(coder->buffer, &coder->pos, coder->filtered,
+ out, out_pos, out_size);
+ }
+
+ // Check if we got everything done.
+ if (coder->end_was_reached && coder->pos == coder->size)
+ return LZMA_STREAM_END;
+
+ return LZMA_OK;
+}
+
+
+static void
+simple_coder_end(void *coder_ptr, const lzma_allocator *allocator)
+{
+ lzma_simple_coder *coder = coder_ptr;
+ lzma_next_end(&coder->next, allocator);
+ lzma_free(coder->simple, allocator);
+ lzma_free(coder, allocator);
+ return;
+}
+
+
+static lzma_ret
+simple_coder_update(void *coder_ptr, const lzma_allocator *allocator,
+ const lzma_filter *filters_null lzma_attribute((__unused__)),
+ const lzma_filter *reversed_filters)
+{
+ lzma_simple_coder *coder = coder_ptr;
+
+ // No update support, just call the next filter in the chain.
+ return lzma_next_filter_update(
+ &coder->next, allocator, reversed_filters + 1);
+}
+
+
+extern lzma_ret
+lzma_simple_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter_info *filters,
+ size_t (*filter)(void *simple, uint32_t now_pos,
+ bool is_encoder, uint8_t *buffer, size_t size),
+ size_t simple_size, size_t unfiltered_max,
+ uint32_t alignment, bool is_encoder)
+{
+ // Allocate memory for the lzma_simple_coder structure if needed.
+ lzma_simple_coder *coder = next->coder;
+ if (coder == NULL) {
+ // Here we allocate space also for the temporary buffer. We
+ // need twice the size of unfiltered_max, because then it
+ // is always possible to filter at least unfiltered_max bytes
+ // more data in coder->buffer[] if it can be filled completely.
+ coder = lzma_alloc(sizeof(lzma_simple_coder)
+ + 2 * unfiltered_max, allocator);
+ if (coder == NULL)
+ return LZMA_MEM_ERROR;
+
+ next->coder = coder;
+ next->code = &simple_code;
+ next->end = &simple_coder_end;
+ next->update = &simple_coder_update;
+
+ coder->next = LZMA_NEXT_CODER_INIT;
+ coder->filter = filter;
+ coder->allocated = 2 * unfiltered_max;
+
+ // Allocate memory for filter-specific data structure.
+ if (simple_size > 0) {
+ coder->simple = lzma_alloc(simple_size, allocator);
+ if (coder->simple == NULL)
+ return LZMA_MEM_ERROR;
+ } else {
+ coder->simple = NULL;
+ }
+ }
+
+ if (filters[0].options != NULL) {
+ const lzma_options_bcj *simple = filters[0].options;
+ coder->now_pos = simple->start_offset;
+ if (coder->now_pos & (alignment - 1))
+ return LZMA_OPTIONS_ERROR;
+ } else {
+ coder->now_pos = 0;
+ }
+
+ // Reset variables.
+ coder->is_encoder = is_encoder;
+ coder->end_was_reached = false;
+ coder->pos = 0;
+ coder->filtered = 0;
+ coder->size = 0;
+
+ return lzma_next_filter_init(&coder->next, allocator, filters + 1);
+}
diff --git a/src/liblzma/simple/simple_coder.h b/src/liblzma/simple/simple_coder.h
new file mode 100644
index 0000000..668a509
--- /dev/null
+++ b/src/liblzma/simple/simple_coder.h
@@ -0,0 +1,81 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file simple_coder.h
+/// \brief Wrapper for simple filters
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_SIMPLE_CODER_H
+#define LZMA_SIMPLE_CODER_H
+
+#include "common.h"
+
+
+extern lzma_ret lzma_simple_x86_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+extern lzma_ret lzma_simple_x86_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+
+extern lzma_ret lzma_simple_powerpc_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+extern lzma_ret lzma_simple_powerpc_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+
+extern lzma_ret lzma_simple_ia64_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+extern lzma_ret lzma_simple_ia64_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+
+extern lzma_ret lzma_simple_arm_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+extern lzma_ret lzma_simple_arm_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+
+extern lzma_ret lzma_simple_armthumb_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+extern lzma_ret lzma_simple_armthumb_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+
+extern lzma_ret lzma_simple_arm64_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+extern lzma_ret lzma_simple_arm64_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+
+extern lzma_ret lzma_simple_sparc_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+extern lzma_ret lzma_simple_sparc_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters);
+
+#endif
diff --git a/src/liblzma/simple/simple_decoder.c b/src/liblzma/simple/simple_decoder.c
new file mode 100644
index 0000000..dc4d241
--- /dev/null
+++ b/src/liblzma/simple/simple_decoder.c
@@ -0,0 +1,40 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file simple_decoder.c
+/// \brief Properties decoder for simple filters
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "simple_decoder.h"
+
+
+extern lzma_ret
+lzma_simple_props_decode(void **options, const lzma_allocator *allocator,
+ const uint8_t *props, size_t props_size)
+{
+ if (props_size == 0)
+ return LZMA_OK;
+
+ if (props_size != 4)
+ return LZMA_OPTIONS_ERROR;
+
+ lzma_options_bcj *opt = lzma_alloc(
+ sizeof(lzma_options_bcj), allocator);
+ if (opt == NULL)
+ return LZMA_MEM_ERROR;
+
+ opt->start_offset = read32le(props);
+
+ // Don't leave an options structure allocated if start_offset is zero.
+ if (opt->start_offset == 0)
+ lzma_free(opt, allocator);
+ else
+ *options = opt;
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/simple/simple_decoder.h b/src/liblzma/simple/simple_decoder.h
new file mode 100644
index 0000000..bed8d37
--- /dev/null
+++ b/src/liblzma/simple/simple_decoder.h
@@ -0,0 +1,22 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file simple_decoder.h
+/// \brief Properties decoder for simple filters
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_SIMPLE_DECODER_H
+#define LZMA_SIMPLE_DECODER_H
+
+#include "simple_coder.h"
+
+extern lzma_ret lzma_simple_props_decode(
+ void **options, const lzma_allocator *allocator,
+ const uint8_t *props, size_t props_size);
+
+#endif
diff --git a/src/liblzma/simple/simple_encoder.c b/src/liblzma/simple/simple_encoder.c
new file mode 100644
index 0000000..d2cc03e
--- /dev/null
+++ b/src/liblzma/simple/simple_encoder.c
@@ -0,0 +1,38 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file simple_encoder.c
+/// \brief Properties encoder for simple filters
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "simple_encoder.h"
+
+
+extern lzma_ret
+lzma_simple_props_size(uint32_t *size, const void *options)
+{
+ const lzma_options_bcj *const opt = options;
+ *size = (opt == NULL || opt->start_offset == 0) ? 0 : 4;
+ return LZMA_OK;
+}
+
+
+extern lzma_ret
+lzma_simple_props_encode(const void *options, uint8_t *out)
+{
+ const lzma_options_bcj *const opt = options;
+
+ // The default start offset is zero, so we don't need to store any
+ // options unless the start offset is non-zero.
+ if (opt == NULL || opt->start_offset == 0)
+ return LZMA_OK;
+
+ write32le(out, opt->start_offset);
+
+ return LZMA_OK;
+}
diff --git a/src/liblzma/simple/simple_encoder.h b/src/liblzma/simple/simple_encoder.h
new file mode 100644
index 0000000..1cee482
--- /dev/null
+++ b/src/liblzma/simple/simple_encoder.h
@@ -0,0 +1,23 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file simple_encoder.c
+/// \brief Properties encoder for simple filters
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_SIMPLE_ENCODER_H
+#define LZMA_SIMPLE_ENCODER_H
+
+#include "simple_coder.h"
+
+
+extern lzma_ret lzma_simple_props_size(uint32_t *size, const void *options);
+
+extern lzma_ret lzma_simple_props_encode(const void *options, uint8_t *out);
+
+#endif
diff --git a/src/liblzma/simple/simple_private.h b/src/liblzma/simple/simple_private.h
new file mode 100644
index 0000000..9d2c0fd
--- /dev/null
+++ b/src/liblzma/simple/simple_private.h
@@ -0,0 +1,74 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file simple_private.h
+/// \brief Private definitions for so called simple filters
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef LZMA_SIMPLE_PRIVATE_H
+#define LZMA_SIMPLE_PRIVATE_H
+
+#include "simple_coder.h"
+
+
+typedef struct {
+ /// Next filter in the chain
+ lzma_next_coder next;
+
+ /// True if the next coder in the chain has returned LZMA_STREAM_END.
+ bool end_was_reached;
+
+ /// True if filter() should encode the data; false to decode.
+ /// Currently all simple filters use the same function for encoding
+ /// and decoding, because the difference between encoders and decoders
+ /// is very small.
+ bool is_encoder;
+
+ /// Pointer to filter-specific function, which does
+ /// the actual filtering.
+ size_t (*filter)(void *simple, uint32_t now_pos,
+ bool is_encoder, uint8_t *buffer, size_t size);
+
+ /// Pointer to filter-specific data, or NULL if filter doesn't need
+ /// any extra data.
+ void *simple;
+
+ /// The lowest 32 bits of the current position in the data. Most
+ /// filters need this to do conversions between absolute and relative
+ /// addresses.
+ uint32_t now_pos;
+
+ /// Size of the memory allocated for the buffer.
+ size_t allocated;
+
+ /// Flushing position in the temporary buffer. buffer[pos] is the
+ /// next byte to be copied to out[].
+ size_t pos;
+
+ /// buffer[filtered] is the first unfiltered byte. When pos is smaller
+ /// than filtered, there is unflushed filtered data in the buffer.
+ size_t filtered;
+
+ /// Total number of bytes (both filtered and unfiltered) currently
+ /// in the temporary buffer.
+ size_t size;
+
+ /// Temporary buffer
+ uint8_t buffer[];
+} lzma_simple_coder;
+
+
+extern lzma_ret lzma_simple_coder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters,
+ size_t (*filter)(void *simple, uint32_t now_pos,
+ bool is_encoder, uint8_t *buffer, size_t size),
+ size_t simple_size, size_t unfiltered_max,
+ uint32_t alignment, bool is_encoder);
+
+#endif
diff --git a/src/liblzma/simple/sparc.c b/src/liblzma/simple/sparc.c
new file mode 100644
index 0000000..bad8492
--- /dev/null
+++ b/src/liblzma/simple/sparc.c
@@ -0,0 +1,87 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file sparc.c
+/// \brief Filter for SPARC binaries
+///
+// Authors: Igor Pavlov
+// Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "simple_private.h"
+
+
+static size_t
+sparc_code(void *simple lzma_attribute((__unused__)),
+ uint32_t now_pos, bool is_encoder,
+ uint8_t *buffer, size_t size)
+{
+ size_t i;
+ for (i = 0; i + 4 <= size; i += 4) {
+
+ if ((buffer[i] == 0x40 && (buffer[i + 1] & 0xC0) == 0x00)
+ || (buffer[i] == 0x7F
+ && (buffer[i + 1] & 0xC0) == 0xC0)) {
+
+ uint32_t src = ((uint32_t)buffer[i + 0] << 24)
+ | ((uint32_t)buffer[i + 1] << 16)
+ | ((uint32_t)buffer[i + 2] << 8)
+ | ((uint32_t)buffer[i + 3]);
+
+ src <<= 2;
+
+ uint32_t dest;
+ if (is_encoder)
+ dest = now_pos + (uint32_t)(i) + src;
+ else
+ dest = src - (now_pos + (uint32_t)(i));
+
+ dest >>= 2;
+
+ dest = (((0 - ((dest >> 22) & 1)) << 22) & 0x3FFFFFFF)
+ | (dest & 0x3FFFFF)
+ | 0x40000000;
+
+ buffer[i + 0] = (uint8_t)(dest >> 24);
+ buffer[i + 1] = (uint8_t)(dest >> 16);
+ buffer[i + 2] = (uint8_t)(dest >> 8);
+ buffer[i + 3] = (uint8_t)(dest);
+ }
+ }
+
+ return i;
+}
+
+
+static lzma_ret
+sparc_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter_info *filters, bool is_encoder)
+{
+ return lzma_simple_coder_init(next, allocator, filters,
+ &sparc_code, 0, 4, 4, is_encoder);
+}
+
+
+#ifdef HAVE_ENCODER_SPARC
+extern lzma_ret
+lzma_simple_sparc_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ return sparc_coder_init(next, allocator, filters, true);
+}
+#endif
+
+
+#ifdef HAVE_DECODER_SPARC
+extern lzma_ret
+lzma_simple_sparc_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ return sparc_coder_init(next, allocator, filters, false);
+}
+#endif
diff --git a/src/liblzma/simple/x86.c b/src/liblzma/simple/x86.c
new file mode 100644
index 0000000..232b295
--- /dev/null
+++ b/src/liblzma/simple/x86.c
@@ -0,0 +1,163 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file x86.c
+/// \brief Filter for x86 binaries (BCJ filter)
+///
+// Authors: Igor Pavlov
+// Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "simple_private.h"
+
+
+#define Test86MSByte(b) ((b) == 0 || (b) == 0xFF)
+
+
+typedef struct {
+ uint32_t prev_mask;
+ uint32_t prev_pos;
+} lzma_simple_x86;
+
+
+static size_t
+x86_code(void *simple_ptr, uint32_t now_pos, bool is_encoder,
+ uint8_t *buffer, size_t size)
+{
+ static const bool MASK_TO_ALLOWED_STATUS[8]
+ = { true, true, true, false, true, false, false, false };
+
+ static const uint32_t MASK_TO_BIT_NUMBER[8]
+ = { 0, 1, 2, 2, 3, 3, 3, 3 };
+
+ lzma_simple_x86 *simple = simple_ptr;
+ uint32_t prev_mask = simple->prev_mask;
+ uint32_t prev_pos = simple->prev_pos;
+
+ if (size < 5)
+ return 0;
+
+ if (now_pos - prev_pos > 5)
+ prev_pos = now_pos - 5;
+
+ const size_t limit = size - 5;
+ size_t buffer_pos = 0;
+
+ while (buffer_pos <= limit) {
+ uint8_t b = buffer[buffer_pos];
+ if (b != 0xE8 && b != 0xE9) {
+ ++buffer_pos;
+ continue;
+ }
+
+ const uint32_t offset = now_pos + (uint32_t)(buffer_pos)
+ - prev_pos;
+ prev_pos = now_pos + (uint32_t)(buffer_pos);
+
+ if (offset > 5) {
+ prev_mask = 0;
+ } else {
+ for (uint32_t i = 0; i < offset; ++i) {
+ prev_mask &= 0x77;
+ prev_mask <<= 1;
+ }
+ }
+
+ b = buffer[buffer_pos + 4];
+
+ if (Test86MSByte(b)
+ && MASK_TO_ALLOWED_STATUS[(prev_mask >> 1) & 0x7]
+ && (prev_mask >> 1) < 0x10) {
+
+ uint32_t src = ((uint32_t)(b) << 24)
+ | ((uint32_t)(buffer[buffer_pos + 3]) << 16)
+ | ((uint32_t)(buffer[buffer_pos + 2]) << 8)
+ | (buffer[buffer_pos + 1]);
+
+ uint32_t dest;
+ while (true) {
+ if (is_encoder)
+ dest = src + (now_pos + (uint32_t)(
+ buffer_pos) + 5);
+ else
+ dest = src - (now_pos + (uint32_t)(
+ buffer_pos) + 5);
+
+ if (prev_mask == 0)
+ break;
+
+ const uint32_t i = MASK_TO_BIT_NUMBER[
+ prev_mask >> 1];
+
+ b = (uint8_t)(dest >> (24 - i * 8));
+
+ if (!Test86MSByte(b))
+ break;
+
+ src = dest ^ ((1U << (32 - i * 8)) - 1);
+ }
+
+ buffer[buffer_pos + 4]
+ = (uint8_t)(~(((dest >> 24) & 1) - 1));
+ buffer[buffer_pos + 3] = (uint8_t)(dest >> 16);
+ buffer[buffer_pos + 2] = (uint8_t)(dest >> 8);
+ buffer[buffer_pos + 1] = (uint8_t)(dest);
+ buffer_pos += 5;
+ prev_mask = 0;
+
+ } else {
+ ++buffer_pos;
+ prev_mask |= 1;
+ if (Test86MSByte(b))
+ prev_mask |= 0x10;
+ }
+ }
+
+ simple->prev_mask = prev_mask;
+ simple->prev_pos = prev_pos;
+
+ return buffer_pos;
+}
+
+
+static lzma_ret
+x86_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,
+ const lzma_filter_info *filters, bool is_encoder)
+{
+ const lzma_ret ret = lzma_simple_coder_init(next, allocator, filters,
+ &x86_code, sizeof(lzma_simple_x86), 5, 1, is_encoder);
+
+ if (ret == LZMA_OK) {
+ lzma_simple_coder *coder = next->coder;
+ lzma_simple_x86 *simple = coder->simple;
+ simple->prev_mask = 0;
+ simple->prev_pos = (uint32_t)(-5);
+ }
+
+ return ret;
+}
+
+
+#ifdef HAVE_ENCODER_X86
+extern lzma_ret
+lzma_simple_x86_encoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ return x86_coder_init(next, allocator, filters, true);
+}
+#endif
+
+
+#ifdef HAVE_DECODER_X86
+extern lzma_ret
+lzma_simple_x86_decoder_init(lzma_next_coder *next,
+ const lzma_allocator *allocator,
+ const lzma_filter_info *filters)
+{
+ return x86_coder_init(next, allocator, filters, false);
+}
+#endif