Adding upstream version 5.4.1.upstream/5.4.1 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 21:12:04 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 21:12:04 +0000
commit: eac54b7c4aec25060d7bd856f7cdc290943d6aae (patch)
tree: 9a6d81c9f88df4698e746d63d14ddafeddd918b8 /doc
parent: Initial commit. (diff)
download: xz-utils-eac54b7c4aec25060d7bd856f7cdc290943d6aae.tar.xz
xz-utils-eac54b7c4aec25060d7bd856f7cdc290943d6aae.zip
33 files changed, 4889 insertions, 0 deletions
diff --git a/doc/examples/00_README.txt b/doc/examples/00_README.txt
new file mode 100644
index 0000000..120e1eb
--- /dev/null
+++ b/doc/examples/00_README.txt
@@ -0,0 +1,31 @@
+
+liblzma example programs
+========================
+
+Introduction
+
+    The examples are written so that the same comments aren't
+    repeated (much) in later files.
+
+    On POSIX systems, the examples should build by just typing "make".
+
+    The examples that use stdin or stdout don't set stdin and stdout
+    to binary mode. On systems where it matters (e.g. Windows) it is
+    possible that the examples won't work without modification.
+
+
+List of examples
+
+    01_compress_easy.c                  Multi-call compression using
+                                        a compression preset
+
+    02_decompress.c                     Multi-call decompression
+
+    03_compress_custom.c                Like 01_compress_easy.c but using
+                                        a custom filter chain
+                                        (x86 BCJ + LZMA2)
+
+    04_compress_easy_mt.c               Multi-threaded multi-call
+                                        compression using a compression
+                                        preset
+
diff --git a/doc/examples/01_compress_easy.c b/doc/examples/01_compress_easy.c
new file mode 100644
index 0000000..ec32a37
--- /dev/null
+++ b/doc/examples/01_compress_easy.c
@@ -0,0 +1,297 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       01_compress_easy.c
+/// \brief      Compress from stdin to stdout in multi-call mode
+///
+/// Usage:      ./01_compress_easy PRESET < INFILE > OUTFILE
+///
+/// Example:    ./01_compress_easy 6 < foo > foo.xz
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <lzma.h>
+
+
+static void
+show_usage_and_exit(const char *argv0)
+{
+	fprintf(stderr, "Usage: %s PRESET < INFILE > OUTFILE\n"
+			"PRESET is a number 0-9 and can optionally be "
+			"followed by `e' to indicate extreme preset\n",
+			argv0);
+	exit(EXIT_FAILURE);
+}
+
+
+static uint32_t
+get_preset(int argc, char **argv)
+{
+	// One argument whose first char must be 0-9.
+	if (argc != 2 || argv[1][0] < '0' || argv[1][0] > '9')
+		show_usage_and_exit(argv[0]);
+
+	// Calculate the preste level 0-9.
+	uint32_t preset = argv[1][0] - '0';
+
+	// If there is a second char, it must be 'e'. It will set
+	// the LZMA_PRESET_EXTREME flag.
+	if (argv[1][1] != '\0') {
+		if (argv[1][1] != 'e' || argv[1][2] != '\0')
+			show_usage_and_exit(argv[0]);
+
+		preset |= LZMA_PRESET_EXTREME;
+	}
+
+	return preset;
+}
+
+
+static bool
+init_encoder(lzma_stream *strm, uint32_t preset)
+{
+	// Initialize the encoder using a preset. Set the integrity to check
+	// to CRC64, which is the default in the xz command line tool. If
+	// the .xz file needs to be decompressed with XZ Embedded, use
+	// LZMA_CHECK_CRC32 instead.
+	lzma_ret ret = lzma_easy_encoder(strm, preset, LZMA_CHECK_CRC64);
+
+	// Return successfully if the initialization went fine.
+	if (ret == LZMA_OK)
+		return true;
+
+	// Something went wrong. The possible errors are documented in
+	// lzma/container.h (src/liblzma/api/lzma/container.h in the source
+	// package or e.g. /usr/include/lzma/container.h depending on the
+	// install prefix).
+	const char *msg;
+	switch (ret) {
+	case LZMA_MEM_ERROR:
+		msg = "Memory allocation failed";
+		break;
+
+	case LZMA_OPTIONS_ERROR:
+		msg = "Specified preset is not supported";
+		break;
+
+	case LZMA_UNSUPPORTED_CHECK:
+		msg = "Specified integrity check is not supported";
+		break;
+
+	default:
+		// This is most likely LZMA_PROG_ERROR indicating a bug in
+		// this program or in liblzma. It is inconvenient to have a
+		// separate error message for errors that should be impossible
+		// to occur, but knowing the error code is important for
+		// debugging. That's why it is good to print the error code
+		// at least when there is no good error message to show.
+		msg = "Unknown error, possibly a bug";
+		break;
+	}
+
+	fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n",
+			msg, ret);
+	return false;
+}
+
+
+static bool
+compress(lzma_stream *strm, FILE *infile, FILE *outfile)
+{
+	// This will be LZMA_RUN until the end of the input file is reached.
+	// This tells lzma_code() when there will be no more input.
+	lzma_action action = LZMA_RUN;
+
+	// Buffers to temporarily hold uncompressed input
+	// and compressed output.
+	uint8_t inbuf[BUFSIZ];
+	uint8_t outbuf[BUFSIZ];
+
+	// Initialize the input and output pointers. Initializing next_in
+	// and avail_in isn't really necessary when we are going to encode
+	// just one file since LZMA_STREAM_INIT takes care of initializing
+	// those already. But it doesn't hurt much and it will be needed
+	// if encoding more than one file like we will in 02_decompress.c.
+	//
+	// While we don't care about strm->total_in or strm->total_out in this
+	// example, it is worth noting that initializing the encoder will
+	// always reset total_in and total_out to zero. But the encoder
+	// initialization doesn't touch next_in, avail_in, next_out, or
+	// avail_out.
+	strm->next_in = NULL;
+	strm->avail_in = 0;
+	strm->next_out = outbuf;
+	strm->avail_out = sizeof(outbuf);
+
+	// Loop until the file has been successfully compressed or until
+	// an error occurs.
+	while (true) {
+		// Fill the input buffer if it is empty.
+		if (strm->avail_in == 0 && !feof(infile)) {
+			strm->next_in = inbuf;
+			strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
+					infile);
+
+			if (ferror(infile)) {
+				fprintf(stderr, "Read error: %s\n",
+						strerror(errno));
+				return false;
+			}
+
+			// Once the end of the input file has been reached,
+			// we need to tell lzma_code() that no more input
+			// will be coming and that it should finish the
+			// encoding.
+			if (feof(infile))
+				action = LZMA_FINISH;
+		}
+
+		// Tell liblzma do the actual encoding.
+		//
+		// This reads up to strm->avail_in bytes of input starting
+		// from strm->next_in. avail_in will be decremented and
+		// next_in incremented by an equal amount to match the
+		// number of input bytes consumed.
+		//
+		// Up to strm->avail_out bytes of compressed output will be
+		// written starting from strm->next_out. avail_out and next_out
+		// will be incremented by an equal amount to match the number
+		// of output bytes written.
+		//
+		// The encoder has to do internal buffering, which means that
+		// it may take quite a bit of input before the same data is
+		// available in compressed form in the output buffer.
+		lzma_ret ret = lzma_code(strm, action);
+
+		// If the output buffer is full or if the compression finished
+		// successfully, write the data from the output buffer to
+		// the output file.
+		if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
+			// When lzma_code() has returned LZMA_STREAM_END,
+			// the output buffer is likely to be only partially
+			// full. Calculate how much new data there is to
+			// be written to the output file.
+			size_t write_size = sizeof(outbuf) - strm->avail_out;
+
+			if (fwrite(outbuf, 1, write_size, outfile)
+					!= write_size) {
+				fprintf(stderr, "Write error: %s\n",
+						strerror(errno));
+				return false;
+			}
+
+			// Reset next_out and avail_out.
+			strm->next_out = outbuf;
+			strm->avail_out = sizeof(outbuf);
+		}
+
+		// Normally the return value of lzma_code() will be LZMA_OK
+		// until everything has been encoded.
+		if (ret != LZMA_OK) {
+			// Once everything has been encoded successfully, the
+			// return value of lzma_code() will be LZMA_STREAM_END.
+			//
+			// It is important to check for LZMA_STREAM_END. Do not
+			// assume that getting ret != LZMA_OK would mean that
+			// everything has gone well.
+			if (ret == LZMA_STREAM_END)
+				return true;
+
+			// It's not LZMA_OK nor LZMA_STREAM_END,
+			// so it must be an error code. See lzma/base.h
+			// (src/liblzma/api/lzma/base.h in the source package
+			// or e.g. /usr/include/lzma/base.h depending on the
+			// install prefix) for the list and documentation of
+			// possible values. Most values listen in lzma_ret
+			// enumeration aren't possible in this example.
+			const char *msg;
+			switch (ret) {
+			case LZMA_MEM_ERROR:
+				msg = "Memory allocation failed";
+				break;
+
+			case LZMA_DATA_ERROR:
+				// This error is returned if the compressed
+				// or uncompressed size get near 8 EiB
+				// (2^63 bytes) because that's where the .xz
+				// file format size limits currently are.
+				// That is, the possibility of this error
+				// is mostly theoretical unless you are doing
+				// something very unusual.
+				//
+				// Note that strm->total_in and strm->total_out
+				// have nothing to do with this error. Changing
+				// those variables won't increase or decrease
+				// the chance of getting this error.
+				msg = "File size limits exceeded";
+				break;
+
+			default:
+				// This is most likely LZMA_PROG_ERROR, but
+				// if this program is buggy (or liblzma has
+				// a bug), it may be e.g. LZMA_BUF_ERROR or
+				// LZMA_OPTIONS_ERROR too.
+				//
+				// It is inconvenient to have a separate
+				// error message for errors that should be
+				// impossible to occur, but knowing the error
+				// code is important for debugging. That's why
+				// it is good to print the error code at least
+				// when there is no good error message to show.
+				msg = "Unknown error, possibly a bug";
+				break;
+			}
+
+			fprintf(stderr, "Encoder error: %s (error code %u)\n",
+					msg, ret);
+			return false;
+		}
+	}
+}
+
+
+extern int
+main(int argc, char **argv)
+{
+	// Get the preset number from the command line.
+	uint32_t preset = get_preset(argc, argv);
+
+	// Initialize a lzma_stream structure. When it is allocated on stack,
+	// it is simplest to use LZMA_STREAM_INIT macro like below. When it
+	// is allocated on heap, using memset(strmptr, 0, sizeof(*strmptr))
+	// works (as long as NULL pointers are represented with zero bits
+	// as they are on practically all computers today).
+	lzma_stream strm = LZMA_STREAM_INIT;
+
+	// Initialize the encoder. If it succeeds, compress from
+	// stdin to stdout.
+	bool success = init_encoder(&strm, preset);
+	if (success)
+		success = compress(&strm, stdin, stdout);
+
+	// Free the memory allocated for the encoder. If we were encoding
+	// multiple files, this would only need to be done after the last
+	// file. See 02_decompress.c for handling of multiple files.
+	//
+	// It is OK to call lzma_end() multiple times or when it hasn't been
+	// actually used except initialized with LZMA_STREAM_INIT.
+	lzma_end(&strm);
+
+	// Close stdout to catch possible write errors that can occur
+	// when pending data is flushed from the stdio buffers.
+	if (fclose(stdout)) {
+		fprintf(stderr, "Write error: %s\n", strerror(errno));
+		success = false;
+	}
+
+	return success ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/doc/examples/02_decompress.c b/doc/examples/02_decompress.c
new file mode 100644
index 0000000..98339be
--- /dev/null
+++ b/doc/examples/02_decompress.c
@@ -0,0 +1,287 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       02_decompress.c
+/// \brief      Decompress .xz files to stdout
+///
+/// Usage:      ./02_decompress INPUT_FILES... > OUTFILE
+///
+/// Example:    ./02_decompress foo.xz bar.xz > foobar
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <lzma.h>
+
+
+static bool
+init_decoder(lzma_stream *strm)
+{
+	// Initialize a .xz decoder. The decoder supports a memory usage limit
+	// and a set of flags.
+	//
+	// The memory usage of the decompressor depends on the settings used
+	// to compress a .xz file. It can vary from less than a megabyte to
+	// a few gigabytes, but in practice (at least for now) it rarely
+	// exceeds 65 MiB because that's how much memory is required to
+	// decompress files created with "xz -9". Settings requiring more
+	// memory take extra effort to use and don't (at least for now)
+	// provide significantly better compression in most cases.
+	//
+	// Memory usage limit is useful if it is important that the
+	// decompressor won't consume gigabytes of memory. The need
+	// for limiting depends on the application. In this example,
+	// no memory usage limiting is used. This is done by setting
+	// the limit to UINT64_MAX.
+	//
+	// The .xz format allows concatenating compressed files as is:
+	//
+	//     echo foo | xz > foobar.xz
+	//     echo bar | xz >> foobar.xz
+	//
+	// When decompressing normal standalone .xz files, LZMA_CONCATENATED
+	// should always be used to support decompression of concatenated
+	// .xz files. If LZMA_CONCATENATED isn't used, the decoder will stop
+	// after the first .xz stream. This can be useful when .xz data has
+	// been embedded inside another file format.
+	//
+	// Flags other than LZMA_CONCATENATED are supported too, and can
+	// be combined with bitwise-or. See lzma/container.h
+	// (src/liblzma/api/lzma/container.h in the source package or e.g.
+	// /usr/include/lzma/container.h depending on the install prefix)
+	// for details.
+	lzma_ret ret = lzma_stream_decoder(
+			strm, UINT64_MAX, LZMA_CONCATENATED);
+
+	// Return successfully if the initialization went fine.
+	if (ret == LZMA_OK)
+		return true;
+
+	// Something went wrong. The possible errors are documented in
+	// lzma/container.h (src/liblzma/api/lzma/container.h in the source
+	// package or e.g. /usr/include/lzma/container.h depending on the
+	// install prefix).
+	//
+	// Note that LZMA_MEMLIMIT_ERROR is never possible here. If you
+	// specify a very tiny limit, the error will be delayed until
+	// the first headers have been parsed by a call to lzma_code().
+	const char *msg;
+	switch (ret) {
+	case LZMA_MEM_ERROR:
+		msg = "Memory allocation failed";
+		break;
+
+	case LZMA_OPTIONS_ERROR:
+		msg = "Unsupported decompressor flags";
+		break;
+
+	default:
+		// This is most likely LZMA_PROG_ERROR indicating a bug in
+		// this program or in liblzma. It is inconvenient to have a
+		// separate error message for errors that should be impossible
+		// to occur, but knowing the error code is important for
+		// debugging. That's why it is good to print the error code
+		// at least when there is no good error message to show.
+		msg = "Unknown error, possibly a bug";
+		break;
+	}
+
+	fprintf(stderr, "Error initializing the decoder: %s (error code %u)\n",
+			msg, ret);
+	return false;
+}
+
+
+static bool
+decompress(lzma_stream *strm, const char *inname, FILE *infile, FILE *outfile)
+{
+	// When LZMA_CONCATENATED flag was used when initializing the decoder,
+	// we need to tell lzma_code() when there will be no more input.
+	// This is done by setting action to LZMA_FINISH instead of LZMA_RUN
+	// in the same way as it is done when encoding.
+	//
+	// When LZMA_CONCATENATED isn't used, there is no need to use
+	// LZMA_FINISH to tell when all the input has been read, but it
+	// is still OK to use it if you want. When LZMA_CONCATENATED isn't
+	// used, the decoder will stop after the first .xz stream. In that
+	// case some unused data may be left in strm->next_in.
+	lzma_action action = LZMA_RUN;
+
+	uint8_t inbuf[BUFSIZ];
+	uint8_t outbuf[BUFSIZ];
+
+	strm->next_in = NULL;
+	strm->avail_in = 0;
+	strm->next_out = outbuf;
+	strm->avail_out = sizeof(outbuf);
+
+	while (true) {
+		if (strm->avail_in == 0 && !feof(infile)) {
+			strm->next_in = inbuf;
+			strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
+					infile);
+
+			if (ferror(infile)) {
+				fprintf(stderr, "%s: Read error: %s\n",
+						inname, strerror(errno));
+				return false;
+			}
+
+			// Once the end of the input file has been reached,
+			// we need to tell lzma_code() that no more input
+			// will be coming. As said before, this isn't required
+			// if the LZMA_CONCATENATED flag isn't used when
+			// initializing the decoder.
+			if (feof(infile))
+				action = LZMA_FINISH;
+		}
+
+		lzma_ret ret = lzma_code(strm, action);
+
+		if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
+			size_t write_size = sizeof(outbuf) - strm->avail_out;
+
+			if (fwrite(outbuf, 1, write_size, outfile)
+					!= write_size) {
+				fprintf(stderr, "Write error: %s\n",
+						strerror(errno));
+				return false;
+			}
+
+			strm->next_out = outbuf;
+			strm->avail_out = sizeof(outbuf);
+		}
+
+		if (ret != LZMA_OK) {
+			// Once everything has been decoded successfully, the
+			// return value of lzma_code() will be LZMA_STREAM_END.
+			//
+			// It is important to check for LZMA_STREAM_END. Do not
+			// assume that getting ret != LZMA_OK would mean that
+			// everything has gone well or that when you aren't
+			// getting more output it must have successfully
+			// decoded everything.
+			if (ret == LZMA_STREAM_END)
+				return true;
+
+			// It's not LZMA_OK nor LZMA_STREAM_END,
+			// so it must be an error code. See lzma/base.h
+			// (src/liblzma/api/lzma/base.h in the source package
+			// or e.g. /usr/include/lzma/base.h depending on the
+			// install prefix) for the list and documentation of
+			// possible values. Many values listen in lzma_ret
+			// enumeration aren't possible in this example, but
+			// can be made possible by enabling memory usage limit
+			// or adding flags to the decoder initialization.
+			const char *msg;
+			switch (ret) {
+			case LZMA_MEM_ERROR:
+				msg = "Memory allocation failed";
+				break;
+
+			case LZMA_FORMAT_ERROR:
+				// .xz magic bytes weren't found.
+				msg = "The input is not in the .xz format";
+				break;
+
+			case LZMA_OPTIONS_ERROR:
+				// For example, the headers specify a filter
+				// that isn't supported by this liblzma
+				// version (or it hasn't been enabled when
+				// building liblzma, but no-one sane does
+				// that unless building liblzma for an
+				// embedded system). Upgrading to a newer
+				// liblzma might help.
+				//
+				// Note that it is unlikely that the file has
+				// accidentally became corrupt if you get this
+				// error. The integrity of the .xz headers is
+				// always verified with a CRC32, so
+				// unintentionally corrupt files can be
+				// distinguished from unsupported files.
+				msg = "Unsupported compression options";
+				break;
+
+			case LZMA_DATA_ERROR:
+				msg = "Compressed file is corrupt";
+				break;
+
+			case LZMA_BUF_ERROR:
+				// Typically this error means that a valid
+				// file has got truncated, but it might also
+				// be a damaged part in the file that makes
+				// the decoder think the file is truncated.
+				// If you prefer, you can use the same error
+				// message for this as for LZMA_DATA_ERROR.
+				msg = "Compressed file is truncated or "
+						"otherwise corrupt";
+				break;
+
+			default:
+				// This is most likely LZMA_PROG_ERROR.
+				msg = "Unknown error, possibly a bug";
+				break;
+			}
+
+			fprintf(stderr, "%s: Decoder error: "
+					"%s (error code %u)\n",
+					inname, msg, ret);
+			return false;
+		}
+	}
+}
+
+
+extern int
+main(int argc, char **argv)
+{
+	if (argc <= 1) {
+		fprintf(stderr, "Usage: %s FILES...\n", argv[0]);
+		return EXIT_FAILURE;
+	}
+
+	lzma_stream strm = LZMA_STREAM_INIT;
+
+	bool success = true;
+
+	// Try to decompress all files.
+	for (int i = 1; i < argc; ++i) {
+		if (!init_decoder(&strm)) {
+			// Decoder initialization failed. There's no point
+			// to retry it so we need to exit.
+			success = false;
+			break;
+		}
+
+		FILE *infile = fopen(argv[i], "rb");
+
+		if (infile == NULL) {
+			fprintf(stderr, "%s: Error opening the "
+					"input file: %s\n",
+					argv[i], strerror(errno));
+			success = false;
+		} else {
+			success &= decompress(&strm, argv[i], infile, stdout);
+			fclose(infile);
+		}
+	}
+
+	// Free the memory allocated for the decoder. This only needs to be
+	// done after the last file.
+	lzma_end(&strm);
+
+	if (fclose(stdout)) {
+		fprintf(stderr, "Write error: %s\n", strerror(errno));
+		success = false;
+	}
+
+	return success ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/doc/examples/03_compress_custom.c b/doc/examples/03_compress_custom.c
new file mode 100644
index 0000000..40c85e3
--- /dev/null
+++ b/doc/examples/03_compress_custom.c
@@ -0,0 +1,193 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       03_compress_custom.c
+/// \brief      Compress in multi-call mode using x86 BCJ and LZMA2
+///
+/// Usage:      ./03_compress_custom < INFILE > OUTFILE
+///
+/// Example:    ./03_compress_custom < foo > foo.xz
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <lzma.h>
+
+
+static bool
+init_encoder(lzma_stream *strm)
+{
+	// Use the default preset (6) for LZMA2.
+	//
+	// The lzma_options_lzma structure and the lzma_lzma_preset() function
+	// are declared in lzma/lzma12.h (src/liblzma/api/lzma/lzma12.h in the
+	// source package or e.g. /usr/include/lzma/lzma12.h depending on
+	// the install prefix).
+	lzma_options_lzma opt_lzma2;
+	if (lzma_lzma_preset(&opt_lzma2, LZMA_PRESET_DEFAULT)) {
+		// It should never fail because the default preset
+		// (and presets 0-9 optionally with LZMA_PRESET_EXTREME)
+		// are supported by all stable liblzma versions.
+		//
+		// (The encoder initialization later in this function may
+		// still fail due to unsupported preset *if* the features
+		// required by the preset have been disabled at build time,
+		// but no-one does such things except on embedded systems.)
+		fprintf(stderr, "Unsupported preset, possibly a bug\n");
+		return false;
+	}
+
+	// Now we could customize the LZMA2 options if we wanted. For example,
+	// we could set the the dictionary size (opt_lzma2.dict_size) to
+	// something else than the default (8 MiB) of the default preset.
+	// See lzma/lzma12.h for details of all LZMA2 options.
+	//
+	// The x86 BCJ filter will try to modify the x86 instruction stream so
+	// that LZMA2 can compress it better. The x86 BCJ filter doesn't need
+	// any options so it will be set to NULL below.
+	//
+	// Construct the filter chain. The uncompressed data goes first to
+	// the first filter in the array, in this case the x86 BCJ filter.
+	// The array is always terminated by setting .id = LZMA_VLI_UNKNOWN.
+	//
+	// See lzma/filter.h for more information about the lzma_filter
+	// structure.
+	lzma_filter filters[] = {
+		{ .id = LZMA_FILTER_X86, .options = NULL },
+		{ .id = LZMA_FILTER_LZMA2, .options = &opt_lzma2 },
+		{ .id = LZMA_VLI_UNKNOWN, .options = NULL },
+	};
+
+	// Initialize the encoder using the custom filter chain.
+	lzma_ret ret = lzma_stream_encoder(strm, filters, LZMA_CHECK_CRC64);
+
+	if (ret == LZMA_OK)
+		return true;
+
+	const char *msg;
+	switch (ret) {
+	case LZMA_MEM_ERROR:
+		msg = "Memory allocation failed";
+		break;
+
+	case LZMA_OPTIONS_ERROR:
+		// We are no longer using a plain preset so this error
+		// message has been edited accordingly compared to
+		// 01_compress_easy.c.
+		msg = "Specified filter chain is not supported";
+		break;
+
+	case LZMA_UNSUPPORTED_CHECK:
+		msg = "Specified integrity check is not supported";
+		break;
+
+	default:
+		msg = "Unknown error, possibly a bug";
+		break;
+	}
+
+	fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n",
+			msg, ret);
+	return false;
+}
+
+
+// This function is identical to the one in 01_compress_easy.c.
+static bool
+compress(lzma_stream *strm, FILE *infile, FILE *outfile)
+{
+	lzma_action action = LZMA_RUN;
+
+	uint8_t inbuf[BUFSIZ];
+	uint8_t outbuf[BUFSIZ];
+
+	strm->next_in = NULL;
+	strm->avail_in = 0;
+	strm->next_out = outbuf;
+	strm->avail_out = sizeof(outbuf);
+
+	while (true) {
+		if (strm->avail_in == 0 && !feof(infile)) {
+			strm->next_in = inbuf;
+			strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
+					infile);
+
+			if (ferror(infile)) {
+				fprintf(stderr, "Read error: %s\n",
+						strerror(errno));
+				return false;
+			}
+
+			if (feof(infile))
+				action = LZMA_FINISH;
+		}
+
+		lzma_ret ret = lzma_code(strm, action);
+
+		if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
+			size_t write_size = sizeof(outbuf) - strm->avail_out;
+
+			if (fwrite(outbuf, 1, write_size, outfile)
+					!= write_size) {
+				fprintf(stderr, "Write error: %s\n",
+						strerror(errno));
+				return false;
+			}
+
+			strm->next_out = outbuf;
+			strm->avail_out = sizeof(outbuf);
+		}
+
+		if (ret != LZMA_OK) {
+			if (ret == LZMA_STREAM_END)
+				return true;
+
+			const char *msg;
+			switch (ret) {
+			case LZMA_MEM_ERROR:
+				msg = "Memory allocation failed";
+				break;
+
+			case LZMA_DATA_ERROR:
+				msg = "File size limits exceeded";
+				break;
+
+			default:
+				msg = "Unknown error, possibly a bug";
+				break;
+			}
+
+			fprintf(stderr, "Encoder error: %s (error code %u)\n",
+					msg, ret);
+			return false;
+		}
+	}
+}
+
+
+extern int
+main(void)
+{
+	lzma_stream strm = LZMA_STREAM_INIT;
+
+	bool success = init_encoder(&strm);
+	if (success)
+		success = compress(&strm, stdin, stdout);
+
+	lzma_end(&strm);
+
+	if (fclose(stdout)) {
+		fprintf(stderr, "Write error: %s\n", strerror(errno));
+		success = false;
+	}
+
+	return success ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/doc/examples/04_compress_easy_mt.c b/doc/examples/04_compress_easy_mt.c
new file mode 100644
index 0000000..efe5697
--- /dev/null
+++ b/doc/examples/04_compress_easy_mt.c
@@ -0,0 +1,206 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       04_compress_easy_mt.c
+/// \brief      Compress in multi-call mode using LZMA2 in multi-threaded mode
+///
+/// Usage:      ./04_compress_easy_mt < INFILE > OUTFILE
+///
+/// Example:    ./04_compress_easy_mt < foo > foo.xz
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <lzma.h>
+
+
+static bool
+init_encoder(lzma_stream *strm)
+{
+	// The threaded encoder takes the options as pointer to
+	// a lzma_mt structure.
+	lzma_mt mt = {
+		// No flags are needed.
+		.flags = 0,
+
+		// Let liblzma determine a sane block size.
+		.block_size = 0,
+
+		// Use no timeout for lzma_code() calls by setting timeout
+		// to zero. That is, sometimes lzma_code() might block for
+		// a long time (from several seconds to even minutes).
+		// If this is not OK, for example due to progress indicator
+		// needing updates, specify a timeout in milliseconds here.
+		// See the documentation of lzma_mt in lzma/container.h for
+		// information how to choose a reasonable timeout.
+		.timeout = 0,
+
+		// Use the default preset (6) for LZMA2.
+		// To use a preset, filters must be set to NULL.
+		.preset = LZMA_PRESET_DEFAULT,
+		.filters = NULL,
+
+		// Use CRC64 for integrity checking. See also
+		// 01_compress_easy.c about choosing the integrity check.
+		.check = LZMA_CHECK_CRC64,
+	};
+
+	// Detect how many threads the CPU supports.
+	mt.threads = lzma_cputhreads();
+
+	// If the number of CPU cores/threads cannot be detected,
+	// use one thread. Note that this isn't the same as the normal
+	// single-threaded mode as this will still split the data into
+	// blocks and use more RAM than the normal single-threaded mode.
+	// You may want to consider using lzma_easy_encoder() or
+	// lzma_stream_encoder() instead of lzma_stream_encoder_mt() if
+	// lzma_cputhreads() returns 0 or 1.
+	if (mt.threads == 0)
+		mt.threads = 1;
+
+	// If the number of CPU cores/threads exceeds threads_max,
+	// limit the number of threads to keep memory usage lower.
+	// The number 8 is arbitrarily chosen and may be too low or
+	// high depending on the compression preset and the computer
+	// being used.
+	//
+	// FIXME: A better way could be to check the amount of RAM
+	// (or available RAM) and use lzma_stream_encoder_mt_memusage()
+	// to determine if the number of threads should be reduced.
+	const uint32_t threads_max = 8;
+	if (mt.threads > threads_max)
+		mt.threads = threads_max;
+
+	// Initialize the threaded encoder.
+	lzma_ret ret = lzma_stream_encoder_mt(strm, &mt);
+
+	if (ret == LZMA_OK)
+		return true;
+
+	const char *msg;
+	switch (ret) {
+	case LZMA_MEM_ERROR:
+		msg = "Memory allocation failed";
+		break;
+
+	case LZMA_OPTIONS_ERROR:
+		// We are no longer using a plain preset so this error
+		// message has been edited accordingly compared to
+		// 01_compress_easy.c.
+		msg = "Specified filter chain is not supported";
+		break;
+
+	case LZMA_UNSUPPORTED_CHECK:
+		msg = "Specified integrity check is not supported";
+		break;
+
+	default:
+		msg = "Unknown error, possibly a bug";
+		break;
+	}
+
+	fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n",
+			msg, ret);
+	return false;
+}
+
+
+// This function is identical to the one in 01_compress_easy.c.
+static bool
+compress(lzma_stream *strm, FILE *infile, FILE *outfile)
+{
+	lzma_action action = LZMA_RUN;
+
+	uint8_t inbuf[BUFSIZ];
+	uint8_t outbuf[BUFSIZ];
+
+	strm->next_in = NULL;
+	strm->avail_in = 0;
+	strm->next_out = outbuf;
+	strm->avail_out = sizeof(outbuf);
+
+	while (true) {
+		if (strm->avail_in == 0 && !feof(infile)) {
+			strm->next_in = inbuf;
+			strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
+					infile);
+
+			if (ferror(infile)) {
+				fprintf(stderr, "Read error: %s\n",
+						strerror(errno));
+				return false;
+			}
+
+			if (feof(infile))
+				action = LZMA_FINISH;
+		}
+
+		lzma_ret ret = lzma_code(strm, action);
+
+		if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
+			size_t write_size = sizeof(outbuf) - strm->avail_out;
+
+			if (fwrite(outbuf, 1, write_size, outfile)
+					!= write_size) {
+				fprintf(stderr, "Write error: %s\n",
+						strerror(errno));
+				return false;
+			}
+
+			strm->next_out = outbuf;
+			strm->avail_out = sizeof(outbuf);
+		}
+
+		if (ret != LZMA_OK) {
+			if (ret == LZMA_STREAM_END)
+				return true;
+
+			const char *msg;
+			switch (ret) {
+			case LZMA_MEM_ERROR:
+				msg = "Memory allocation failed";
+				break;
+
+			case LZMA_DATA_ERROR:
+				msg = "File size limits exceeded";
+				break;
+
+			default:
+				msg = "Unknown error, possibly a bug";
+				break;
+			}
+
+			fprintf(stderr, "Encoder error: %s (error code %u)\n",
+					msg, ret);
+			return false;
+		}
+	}
+}
+
+
+extern int
+main(void)
+{
+	lzma_stream strm = LZMA_STREAM_INIT;
+
+	bool success = init_encoder(&strm);
+	if (success)
+		success = compress(&strm, stdin, stdout);
+
+	lzma_end(&strm);
+
+	if (fclose(stdout)) {
+		fprintf(stderr, "Write error: %s\n", strerror(errno));
+		success = false;
+	}
+
+	return success ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/doc/examples/Makefile b/doc/examples/Makefile
new file mode 100644
index 0000000..e8839d8
--- /dev/null
+++ b/doc/examples/Makefile
@@ -0,0 +1,25 @@
+#
+# Author: Lasse Collin
+#
+# This file has been put into the public domain.
+# You can do whatever you want with this file.
+#
+
+CC = c99
+CFLAGS = -g
+LDFLAGS = -llzma
+
+PROGS = \
+	01_compress_easy \
+	02_decompress \
+	03_compress_custom \
+	04_compress_easy_mt \
+	11_file_info
+
+all: $(PROGS)
+
+.c:
+	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
+
+clean:
+	-rm -f $(PROGS)
diff --git a/doc/examples_old/xz_pipe_comp.c b/doc/examples_old/xz_pipe_comp.c
new file mode 100644
index 0000000..9f9224b
--- /dev/null
+++ b/doc/examples_old/xz_pipe_comp.c
@@ -0,0 +1,127 @@
+/*
+ * xz_pipe_comp.c
+ * A simple example of pipe-only xz compressor implementation.
+ * version: 2010-07-12 - by Daniel Mealha Cabrita
+ * Not copyrighted -- provided to the public domain.
+ *
+ * Compiling:
+ * Link with liblzma. GCC example:
+ * $ gcc -llzma xz_pipe_comp.c -o xz_pipe_comp
+ *
+ * Usage example:
+ * $ cat some_file | ./xz_pipe_comp > some_file.xz
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <lzma.h>
+
+
+/* COMPRESSION SETTINGS */
+
+/* analogous to xz CLI options: -0 to -9 */
+#define COMPRESSION_LEVEL 6
+
+/* boolean setting, analogous to xz CLI option: -e */
+#define COMPRESSION_EXTREME true
+
+/* see: /usr/include/lzma/check.h LZMA_CHECK_* */
+#define INTEGRITY_CHECK LZMA_CHECK_CRC64
+
+
+/* read/write buffer sizes */
+#define IN_BUF_MAX	4096
+#define OUT_BUF_MAX	4096
+
+/* error codes */
+#define RET_OK			0
+#define RET_ERROR_INIT		1
+#define RET_ERROR_INPUT		2
+#define RET_ERROR_OUTPUT	3
+#define RET_ERROR_COMPRESSION	4
+
+
+/* note: in_file and out_file must be open already */
+int xz_compress (FILE *in_file, FILE *out_file)
+{
+	uint32_t preset = COMPRESSION_LEVEL | (COMPRESSION_EXTREME ? LZMA_PRESET_EXTREME : 0);
+	lzma_check check = INTEGRITY_CHECK;
+	lzma_stream strm = LZMA_STREAM_INIT; /* alloc and init lzma_stream struct */
+	uint8_t in_buf [IN_BUF_MAX];
+	uint8_t out_buf [OUT_BUF_MAX];
+	size_t in_len;	/* length of useful data in in_buf */
+	size_t out_len;	/* length of useful data in out_buf */
+	bool in_finished = false;
+	bool out_finished = false;
+	lzma_action action;
+	lzma_ret ret_xz;
+	int ret;
+
+	ret = RET_OK;
+
+	/* initialize xz encoder */
+	ret_xz = lzma_easy_encoder (&strm, preset, check);
+	if (ret_xz != LZMA_OK) {
+		fprintf (stderr, "lzma_easy_encoder error: %d\n", (int) ret_xz);
+		return RET_ERROR_INIT;
+	}
+
+	while ((! in_finished) && (! out_finished)) {
+		/* read incoming data */
+		in_len = fread (in_buf, 1, IN_BUF_MAX, in_file);
+
+		if (feof (in_file)) {
+			in_finished = true;
+		}
+		if (ferror (in_file)) {
+			in_finished = true;
+			ret = RET_ERROR_INPUT;
+		}
+
+		strm.next_in = in_buf;
+		strm.avail_in = in_len;
+
+		/* if no more data from in_buf, flushes the
+		   internal xz buffers and closes the xz data
+		   with LZMA_FINISH */
+		action = in_finished ? LZMA_FINISH : LZMA_RUN;
+
+		/* loop until there's no pending compressed output */
+		do {
+			/* out_buf is clean at this point */
+			strm.next_out = out_buf;
+			strm.avail_out = OUT_BUF_MAX;
+
+			/* compress data */
+			ret_xz = lzma_code (&strm, action);
+
+			if ((ret_xz != LZMA_OK) && (ret_xz != LZMA_STREAM_END)) {
+				fprintf (stderr, "lzma_code error: %d\n", (int) ret_xz);
+				out_finished = true;
+				ret = RET_ERROR_COMPRESSION;
+			} else {
+				/* write compressed data */
+				out_len = OUT_BUF_MAX - strm.avail_out;
+				fwrite (out_buf, 1, out_len, out_file);
+				if (ferror (out_file)) {
+					out_finished = true;
+					ret = RET_ERROR_OUTPUT;
+				}
+			}
+		} while (strm.avail_out == 0);
+	}
+
+	lzma_end (&strm);
+	return ret;
+}
+
+int main ()
+{
+	int ret;
+
+	ret = xz_compress (stdin, stdout);
+	return ret;
+}
+
diff --git a/doc/examples_old/xz_pipe_decomp.c b/doc/examples_old/xz_pipe_decomp.c
new file mode 100644
index 0000000..fb5ad89
--- /dev/null
+++ b/doc/examples_old/xz_pipe_decomp.c
@@ -0,0 +1,123 @@
+/*
+ * xz_pipe_decomp.c
+ * A simple example of pipe-only xz decompressor implementation.
+ * version: 2012-06-14 - by Daniel Mealha Cabrita
+ * Not copyrighted -- provided to the public domain.
+ *
+ * Compiling:
+ * Link with liblzma. GCC example:
+ * $ gcc -llzma xz_pipe_decomp.c -o xz_pipe_decomp
+ *
+ * Usage example:
+ * $ cat some_file.xz | ./xz_pipe_decomp > some_file
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <lzma.h>
+
+
+/* read/write buffer sizes */
+#define IN_BUF_MAX	4096
+#define OUT_BUF_MAX	4096
+
+/* error codes */
+#define RET_OK			0
+#define RET_ERROR_INIT		1
+#define RET_ERROR_INPUT		2
+#define RET_ERROR_OUTPUT	3
+#define RET_ERROR_DECOMPRESSION	4
+
+
+/* note: in_file and out_file must be open already */
+int xz_decompress (FILE *in_file, FILE *out_file)
+{
+	lzma_stream strm = LZMA_STREAM_INIT; /* alloc and init lzma_stream struct */
+	const uint32_t flags = LZMA_TELL_UNSUPPORTED_CHECK | LZMA_CONCATENATED;
+	const uint64_t memory_limit = UINT64_MAX; /* no memory limit */
+	uint8_t in_buf [IN_BUF_MAX];
+	uint8_t out_buf [OUT_BUF_MAX];
+	size_t in_len;	/* length of useful data in in_buf */
+	size_t out_len;	/* length of useful data in out_buf */
+	bool in_finished = false;
+	bool out_finished = false;
+	lzma_action action;
+	lzma_ret ret_xz;
+	int ret;
+
+	ret = RET_OK;
+
+	/* initialize xz decoder */
+	ret_xz = lzma_stream_decoder (&strm, memory_limit, flags);
+	if (ret_xz != LZMA_OK) {
+		fprintf (stderr, "lzma_stream_decoder error: %d\n", (int) ret_xz);
+		return RET_ERROR_INIT;
+	}
+
+	while ((! in_finished) && (! out_finished)) {
+		/* read incoming data */
+		in_len = fread (in_buf, 1, IN_BUF_MAX, in_file);
+
+		if (feof (in_file)) {
+			in_finished = true;
+		}
+		if (ferror (in_file)) {
+			in_finished = true;
+			ret = RET_ERROR_INPUT;
+		}
+
+		strm.next_in = in_buf;
+		strm.avail_in = in_len;
+
+		/* if no more data from in_buf, flushes the
+		   internal xz buffers and closes the decompressed data
+		   with LZMA_FINISH */
+		action = in_finished ? LZMA_FINISH : LZMA_RUN;
+
+		/* loop until there's no pending decompressed output */
+		do {
+			/* out_buf is clean at this point */
+			strm.next_out = out_buf;
+			strm.avail_out = OUT_BUF_MAX;
+
+			/* decompress data */
+			ret_xz = lzma_code (&strm, action);
+
+			if ((ret_xz != LZMA_OK) && (ret_xz != LZMA_STREAM_END)) {
+				fprintf (stderr, "lzma_code error: %d\n", (int) ret_xz);
+				out_finished = true;
+				ret = RET_ERROR_DECOMPRESSION;
+			} else {
+				/* write decompressed data */
+				out_len = OUT_BUF_MAX - strm.avail_out;
+				fwrite (out_buf, 1, out_len, out_file);
+				if (ferror (out_file)) {
+					out_finished = true;
+					ret = RET_ERROR_OUTPUT;
+				}
+			}
+		} while (strm.avail_out == 0);
+	}
+
+	/* Bug fix (2012-06-14): If no errors were detected, check
+	   that the last lzma_code() call returned LZMA_STREAM_END.
+	   If not, the file is probably truncated. */
+	if ((ret == RET_OK) && (ret_xz != LZMA_STREAM_END)) {
+		fprintf (stderr, "Input truncated or corrupt\n");
+		ret = RET_ERROR_DECOMPRESSION;
+	}
+
+	lzma_end (&strm);
+	return ret;
+}
+
+int main ()
+{
+	int ret;
+
+	ret = xz_decompress (stdin, stdout);
+	return ret;
+}
+
diff --git a/doc/faq.txt b/doc/faq.txt
new file mode 100644
index 0000000..3f9068b
--- /dev/null
+++ b/doc/faq.txt
@@ -0,0 +1,244 @@
+
+XZ Utils FAQ
+============
+
+Q:  What do the letters XZ mean?
+
+A:  Nothing. They are just two letters, which come from the file format
+    suffix .xz. The .xz suffix was selected, because it seemed to be
+    pretty much unused. It has no deeper meaning.
+
+
+Q:  What are LZMA and LZMA2?
+
+A:  LZMA stands for Lempel-Ziv-Markov chain-Algorithm. It is the name
+    of the compression algorithm designed by Igor Pavlov for 7-Zip.
+    LZMA is based on LZ77 and range encoding.
+
+    LZMA2 is an updated version of the original LZMA to fix a couple of
+    practical issues. In context of XZ Utils, LZMA is called LZMA1 to
+    emphasize that LZMA is not the same thing as LZMA2. LZMA2 is the
+    primary compression algorithm in the .xz file format.
+
+
+Q:  There are many LZMA related projects. How does XZ Utils relate to them?
+
+A:  7-Zip and LZMA SDK are the original projects. LZMA SDK is roughly
+    a subset of the 7-Zip source tree.
+
+    p7zip is 7-Zip's command-line tools ported to POSIX-like systems.
+
+    LZMA Utils provide a gzip-like lzma tool for POSIX-like systems.
+    LZMA Utils are based on LZMA SDK. XZ Utils are the successor to
+    LZMA Utils.
+
+    There are several other projects using LZMA. Most are more or less
+    based on LZMA SDK. See <https://7-zip.org/links.html>.
+
+
+Q:  Why is liblzma named liblzma if its primary file format is .xz?
+    Shouldn't it be e.g. libxz?
+
+A:  When the designing of the .xz format began, the idea was to replace
+    the .lzma format and use the same .lzma suffix. It would have been
+    quite OK to reuse the suffix when there were very few .lzma files
+    around. However, the old .lzma format became popular before the
+    new format was finished. The new format was renamed to .xz but the
+    name of liblzma wasn't changed.
+
+
+Q:  Do XZ Utils support the .7z format?
+
+A:  No. Use 7-Zip (Windows) or p7zip (POSIX-like systems) to handle .7z
+    files.
+
+
+Q:  I have many .tar.7z files. Can I convert them to .tar.xz without
+    spending hours recompressing the data?
+
+A:  In the "extra" directory, there is a script named 7z2lzma.bash which
+    is able to convert some .7z files to the .lzma format (not .xz). It
+    needs the 7za (or 7z) command from p7zip. The script may silently
+    produce corrupt output if certain assumptions are not met, so
+    decompress the resulting .lzma file and compare it against the
+    original before deleting the original file!
+
+
+Q:  I have many .lzma files. Can I quickly convert them to the .xz format?
+
+A:  For now, no. Since XZ Utils supports the .lzma format, it's usually
+    not too bad to keep the old files in the old format. If you want to
+    do the conversion anyway, you need to decompress the .lzma files and
+    then recompress to the .xz format.
+
+    Technically, there is a way to make the conversion relatively fast
+    (roughly twice the time that normal decompression takes). Writing
+    such a tool would take quite a bit of time though, and would probably
+    be useful to only a few people. If you really want such a conversion
+    tool, contact Lasse Collin and offer some money.
+
+
+Q:  I have installed xz, but my tar doesn't recognize .tar.xz files.
+    How can I extract .tar.xz files?
+
+A:  xz -dc foo.tar.xz | tar xf -
+
+
+Q:  Can I recover parts of a broken .xz file (e.g. a corrupted CD-R)?
+
+A:  It may be possible if the file consists of multiple blocks, which
+    typically is not the case if the file was created in single-threaded
+    mode. There is no recovery program yet.
+
+
+Q:  Is (some part of) XZ Utils patented?
+
+A:  Lasse Collin is not aware of any patents that could affect XZ Utils.
+    However, due to the nature of software patents, it's not possible to
+    guarantee that XZ Utils isn't affected by any third party patent(s).
+
+
+Q:  Where can I find documentation about the file format and algorithms?
+
+A:  The .xz format is documented in xz-file-format.txt. It is a container
+    format only, and doesn't include descriptions of any non-trivial
+    filters.
+
+    Documenting LZMA and LZMA2 is planned, but for now, there is no other
+    documentation than the source code. Before you begin, you should know
+    the basics of LZ77 and range-coding algorithms. LZMA is based on LZ77,
+    but LZMA is a lot more complex. Range coding is used to compress
+    the final bitstream like Huffman coding is used in Deflate.
+
+
+Q:  I cannot find BCJ and BCJ2 filters. Don't they exist in liblzma?
+
+A:  BCJ filter is called "x86" in liblzma. BCJ2 is not included,
+    because it requires using more than one encoded output stream.
+
+
+Q:  I need to use a script that runs "xz -9". On a system with 256 MiB
+    of RAM, xz says that it cannot allocate memory. Can I make the
+    script work without modifying it?
+
+A:  Set a default memory usage limit for compression. You can do it e.g.
+    in a shell initialization script such as ~/.bashrc or /etc/profile:
+
+        XZ_DEFAULTS=--memlimit-compress=150MiB
+        export XZ_DEFAULTS
+
+    xz will then scale the compression settings down so that the given
+    memory usage limit is not reached. This way xz shouldn't run out
+    of memory.
+
+    Check also that memory-related resource limits are high enough.
+    On most systems, "ulimit -a" will show the current resource limits.
+
+
+Q:  How do I create files that can be decompressed with XZ Embedded?
+
+A:  See the documentation in XZ Embedded. In short, something like
+    this is a good start:
+
+        xz --check=crc32 --lzma2=preset=6e,dict=64KiB
+
+    Or if a BCJ filter is needed too, e.g. if compressing
+    a kernel image for PowerPC:
+
+        xz --check=crc32 --powerpc --lzma2=preset=6e,dict=64KiB
+
+    Adjust the dictionary size to get a good compromise between
+    compression ratio and decompressor memory usage. Note that
+    in single-call decompression mode of XZ Embedded, a big
+    dictionary doesn't increase memory usage.
+
+
+Q:  How is multi-threaded compression implemented in XZ Utils?
+
+A:  The simplest method is splitting the uncompressed data into blocks
+    and compressing them in parallel independent from each other.
+    This is currently the only threading method supported in XZ Utils.
+    Since the blocks are compressed independently, they can also be
+    decompressed independently. Together with the index feature in .xz,
+    this allows using threads to create .xz files for random-access
+    reading. This also makes threaded decompression possible.
+
+    The independent blocks method has a couple of disadvantages too. It
+    will compress worse than a single-block method. Often the difference
+    is not too big (maybe 1-2 %) but sometimes it can be too big. Also,
+    the memory usage of the compressor increases linearly when adding
+    threads.
+
+    At least two other threading methods are possible but these haven't
+    been implemented in XZ Utils:
+
+    Match finder parallelization has been in 7-Zip for ages. It doesn't
+    affect compression ratio or memory usage significantly. Among the
+    three threading methods, only this is useful when compressing small
+    files (files that are not significantly bigger than the dictionary).
+    Unfortunately this method scales only to about two CPU cores.
+
+    The third method is pigz-style threading (I use that name, because
+    pigz <https://www.zlib.net/pigz/> uses that method). It doesn't
+    affect compression ratio significantly and scales to many cores.
+    The memory usage scales linearly when threads are added. This isn't
+    significant with pigz, because Deflate uses only a 32 KiB dictionary,
+    but with LZMA2 the memory usage will increase dramatically just like
+    with the independent-blocks method. There is also a constant
+    computational overhead, which may make pigz-method a bit dull on
+    dual-core compared to the parallel match finder method, but with more
+    cores the overhead is not a big deal anymore.
+
+    Combining the threading methods will be possible and also useful.
+    For example, combining match finder parallelization with pigz-style
+    threading or independent-blocks-threading can cut the memory usage
+    by 50 %.
+
+
+Q:  I told xz to use many threads but it is using only one or two
+    processor cores. What is wrong?
+
+A:  Since multi-threaded compression is done by splitting the data into
+    blocks that are compressed individually, if the input file is too
+    small for the block size, then many threads cannot be used. The
+    default block size increases when the compression level is
+    increased. For example, xz -6 uses 8 MiB LZMA2 dictionary and
+    24 MiB blocks, and xz -9 uses 64 MiB LZMA dictionary and 192 MiB
+    blocks. If the input file is 100 MiB, xz -6 can use five threads
+    of which one will finish quickly as it has only 4 MiB to compress.
+    However, for the same file, xz -9 can only use one thread.
+
+    One can adjust block size with --block-size=SIZE but making the
+    block size smaller than LZMA2 dictionary is waste of RAM: using
+    xz -9 with 6 MiB blocks isn't any better than using xz -6 with
+    6 MiB blocks. The default settings use a block size bigger than
+    the LZMA2 dictionary size because this was seen as a reasonable
+    compromise between RAM usage and compression ratio.
+
+    When decompressing, the ability to use threads depends on how the
+    file was created. If it was created in multi-threaded mode then
+    it can be decompressed in multi-threaded mode too if there are
+    multiple blocks in the file.
+
+
+Q:  How do I build a program that needs liblzmadec (lzmadec.h)?
+
+A:  liblzmadec is part of LZMA Utils. XZ Utils has liblzma, but no
+    liblzmadec. The code using liblzmadec should be ported to use
+    liblzma instead. If you cannot or don't want to do that, download
+    LZMA Utils from <https://tukaani.org/lzma/>.
+
+
+Q:  The default build of liblzma is too big. How can I make it smaller?
+
+A:  Give --enable-small to the configure script. Use also appropriate
+    --enable or --disable options to include only those filter encoders
+    and decoders and integrity checks that you actually need. Use
+    CFLAGS=-Os (with GCC) or equivalent to tell your compiler to optimize
+    for size. See INSTALL for information about configure options.
+
+    If the result is still too big, take a look at XZ Embedded. It is
+    a separate project, which provides a limited but significantly
+    smaller XZ decoder implementation than XZ Utils. You can find it
+    at <https://tukaani.org/xz/embedded.html>.
+
diff --git a/doc/history.txt b/doc/history.txt
new file mode 100644
index 0000000..8545e23
--- /dev/null
+++ b/doc/history.txt
@@ -0,0 +1,150 @@
+
+History of LZMA Utils and XZ Utils
+==================================
+
+Tukaani distribution
+
+    In 2005, there was a small group working on the Tukaani distribution,
+    which was a Slackware fork. One of the project's goals was to fit the
+    distro on a single 700 MiB ISO-9660 image. Using LZMA instead of gzip
+    helped a lot. Roughly speaking, one could fit data that took 1000 MiB
+    in gzipped form into 700 MiB with LZMA. Naturally, the compression
+    ratio varied across packages, but this was what we got on average.
+
+    Slackware packages have traditionally had .tgz as the filename suffix,
+    which is an abbreviation of .tar.gz. A logical naming for LZMA
+    compressed packages was .tlz, being an abbreviation of .tar.lzma.
+
+    At the end of the year 2007, there was no distribution under the
+    Tukaani project anymore, but development of LZMA Utils was kept going.
+    Still, there were .tlz packages around, because at least Vector Linux
+    (a Slackware based distribution) used LZMA for its packages.
+
+    First versions of the modified pkgtools used the LZMA_Alone tool from
+    Igor Pavlov's LZMA SDK as is. It was fine, because users wouldn't need
+    to interact with LZMA_Alone directly. But people soon wanted to use
+    LZMA for other files too, and the interface of LZMA_Alone wasn't
+    comfortable for those used to gzip and bzip2.
+
+
+First steps of LZMA Utils
+
+    The first version of LZMA Utils (4.22.0) included a shell script called
+    lzmash. It was a wrapper that had a gzip-like command-line interface. It
+    used the LZMA_Alone tool from LZMA SDK to do all the real work. zgrep,
+    zdiff, and related scripts from gzip were adapted to work with LZMA and
+    were part of the first LZMA Utils release too.
+
+    LZMA Utils 4.22.0 included also lzmadec, which was a small (less than
+    10 KiB) decoder-only command-line tool. It was written on top of the
+    decoder-only C code found from the LZMA SDK. lzmadec was convenient in
+    situations where LZMA_Alone (a few hundred KiB) would be too big.
+
+    lzmash and lzmadec were written by Lasse Collin.
+
+
+Second generation
+
+    The lzmash script was an ugly and not very secure hack. The last
+    version of LZMA Utils to use lzmash was 4.27.1.
+
+    LZMA Utils 4.32.0beta1 introduced a new lzma command-line tool written
+    by Ville Koskinen. It was written in C++, and used the encoder and
+    decoder from C++ LZMA SDK with some little modifications. This tool
+    replaced both the lzmash script and the LZMA_Alone command-line tool
+    in LZMA Utils.
+
+    Introducing this new tool caused some temporary incompatibilities,
+    because the LZMA_Alone executable was simply named lzma like the new
+    command-line tool, but they had a completely different command-line
+    interface. The file format was still the same.
+
+    Lasse wrote liblzmadec, which was a small decoder-only library based
+    on the C code found from LZMA SDK. liblzmadec had an API similar to
+    zlib, although there were some significant differences, which made it
+    non-trivial to use it in some applications designed for zlib and
+    libbzip2.
+
+    The lzmadec command-line tool was converted to use liblzmadec.
+
+    Alexandre Sauvé helped converting the build system to use GNU
+    Autotools. This made it easier to test for certain less portable
+    features needed by the new command-line tool.
+
+    Since the new command-line tool never got completely finished (for
+    example, it didn't support the LZMA_OPT environment variable), the
+    intent was to not call 4.32.x stable. Similarly, liblzmadec wasn't
+    polished, but appeared to work well enough, so some people started
+    using it too.
+
+    Because the development of the third generation of LZMA Utils was
+    delayed considerably (3-4 years), the 4.32.x branch had to be kept
+    maintained. It got some bug fixes now and then, and finally it was
+    decided to call it stable, although most of the missing features were
+    never added.
+
+
+File format problems
+
+    The file format used by LZMA_Alone was primitive. It was designed with
+    embedded systems in mind, and thus provided only a minimal set of
+    features. The two biggest problems for non-embedded use were the lack
+    of magic bytes and an integrity check.
+
+    Igor and Lasse started developing a new file format with some help
+    from Ville Koskinen. Also Mark Adler, Mikko Pouru, H. Peter Anvin,
+    and Lars Wirzenius helped with some minor things at some point of the
+    development. Designing the new format took quite a long time (actually,
+    too long a time would be a more appropriate expression). It was mostly
+    because Lasse was quite slow at getting things done due to personal
+    reasons.
+
+    Originally the new format was supposed to use the same .lzma suffix
+    that was already used by the old file format. Switching to the new
+    format wouldn't have caused much trouble when the old format wasn't
+    used by many people. But since the development of the new format took
+    such a long time, the old format got quite popular, and it was decided
+    that the new file format must use a different suffix.
+
+    It was decided to use .xz as the suffix of the new file format. The
+    first stable .xz file format specification was finally released in
+    December 2008. In addition to fixing the most obvious problems of
+    the old .lzma format, the .xz format added some new features like
+    support for multiple filters (compression algorithms), filter chaining
+    (like piping on the command line), and limited random-access reading.
+
+    Currently the primary compression algorithm used in .xz is LZMA2.
+    It is an extension on top of the original LZMA to fix some practical
+    problems: LZMA2 adds support for flushing the encoder, uncompressed
+    chunks, eases stateful decoder implementations, and improves support
+    for multithreading. Since LZMA2 is better than the original LZMA, the
+    original LZMA is not supported in .xz.
+
+
+Transition to XZ Utils
+
+    The early versions of XZ Utils were called LZMA Utils. The first
+    releases were 4.42.0alphas. They dropped the rest of the C++ LZMA SDK.
+    The code was still directly based on LZMA SDK but ported to C and
+    converted from a callback API to a stateful API. Later, Igor Pavlov
+    made a C version of the LZMA encoder too; these ports from C++ to C
+    were independent in LZMA SDK and LZMA Utils.
+
+    The core of the new LZMA Utils was liblzma, a compression library with
+    a zlib-like API. liblzma supported both the old and new file format.
+    The gzip-like lzma command-line tool was rewritten to use liblzma.
+
+    The new LZMA Utils code base was renamed to XZ Utils when the name
+    of the new file format had been decided. The liblzma compression
+    library retained its name though, because changing it would have
+    caused unnecessary breakage in applications already using the early
+    liblzma snapshots.
+
+    The xz command-line tool can emulate the gzip-like lzma tool by
+    creating appropriate symlinks (e.g. lzma -> xz). Thus, practically
+    all scripts using the lzma tool from LZMA Utils will work as is with
+    XZ Utils (and will keep using the old .lzma format). Still, the .lzma
+    format is more or less deprecated. XZ Utils will keep supporting it,
+    but new applications should use the .xz format, and migrating old
+    applications to .xz is often a good idea too.
+
diff --git a/doc/lzma-file-format.txt b/doc/lzma-file-format.txt
new file mode 100644
index 0000000..4865def
--- /dev/null
+++ b/doc/lzma-file-format.txt
@@ -0,0 +1,173 @@
+
+The .lzma File Format
+=====================
+
+        0. Preface
+           0.1. Notices and Acknowledgements
+           0.2. Changes
+        1. File Format
+           1.1. Header
+                1.1.1. Properties
+                1.1.2. Dictionary Size
+                1.1.3. Uncompressed Size
+           1.2. LZMA Compressed Data
+        2. References
+
+
+0. Preface
+
+        This document describes the .lzma file format, which is
+        sometimes also called LZMA_Alone format. It is a legacy file
+        format, which is being or has been replaced by the .xz format.
+        The MIME type of the .lzma format is `application/x-lzma'.
+
+        The most commonly used software to handle .lzma files are
+        LZMA SDK, LZMA Utils, 7-Zip, and XZ Utils. This document
+        describes some of the differences between these implementations
+        and gives hints what subset of the .lzma format is the most
+        portable.
+
+
+0.1. Notices and Acknowledgements
+
+        This file format was designed by Igor Pavlov for use in
+        LZMA SDK. This document was written by Lasse Collin
+        <lasse.collin@tukaani.org> using the documentation found
+        from the LZMA SDK.
+
+        This document has been put into the public domain.
+
+
+0.2. Changes
+
+        Last modified: 2022-07-13 21:00+0300
+
+        Compared to the previous version (2011-04-12 11:55+0300)
+        the section 1.1.3 was modified to allow End of Payload Marker
+        with a known Uncompressed Size.
+
+
+1. File Format
+
+        +-+-+-+-+-+-+-+-+-+-+-+-+-+==========================+
+        |         Header          |   LZMA Compressed Data   |
+        +-+-+-+-+-+-+-+-+-+-+-+-+-+==========================+
+
+        The .lzma format file consist of 13-byte Header followed by
+        the LZMA Compressed Data.
+
+        Unlike the .gz, .bz2, and .xz formats, it is not possible to
+        concatenate multiple .lzma files as is and expect the
+        decompression tool to decode the resulting file as if it were
+        a single .lzma file.
+
+        For example, the command line tools from LZMA Utils and
+        LZMA SDK silently ignore all the data after the first .lzma
+        stream. In contrast, the command line tool from XZ Utils
+        considers the .lzma file to be corrupt if there is data after
+        the first .lzma stream.
+
+
+1.1. Header
+
+        +------------+----+----+----+----+--+--+--+--+--+--+--+--+
+        | Properties |  Dictionary Size  |   Uncompressed Size   |
+        +------------+----+----+----+----+--+--+--+--+--+--+--+--+
+
+
+1.1.1. Properties
+
+        The Properties field contains three properties. An abbreviation
+        is given in parentheses, followed by the value range of the
+        property. The field consists of
+
+            1) the number of literal context bits (lc, [0, 8]);
+            2) the number of literal position bits (lp, [0, 4]); and
+            3) the number of position bits (pb, [0, 4]).
+
+        The properties are encoded using the following formula:
+
+            Properties = (pb * 5 + lp) * 9 + lc
+
+        The following C code illustrates a straightforward way to
+        decode the Properties field:
+
+            uint8_t lc, lp, pb;
+            uint8_t prop = get_lzma_properties();
+            if (prop > (4 * 5 + 4) * 9 + 8)
+                return LZMA_PROPERTIES_ERROR;
+
+            pb = prop / (9 * 5);
+            prop -= pb * 9 * 5;
+            lp = prop / 9;
+            lc = prop - lp * 9;
+
+        XZ Utils has an additional requirement: lc + lp <= 4. Files
+        which don't follow this requirement cannot be decompressed
+        with XZ Utils. Usually this isn't a problem since the most
+        common lc/lp/pb values are 3/0/2. It is the only lc/lp/pb
+        combination that the files created by LZMA Utils can have,
+        but LZMA Utils can decompress files with any lc/lp/pb.
+
+
+1.1.2. Dictionary Size
+
+        Dictionary Size is stored as an unsigned 32-bit little endian
+        integer. Any 32-bit value is possible, but for maximum
+        portability, only sizes of 2^n and 2^n + 2^(n-1) should be
+        used.
+
+        LZMA Utils creates only files with dictionary size 2^n,
+        16 <= n <= 25. LZMA Utils can decompress files with any
+        dictionary size.
+
+        XZ Utils creates and decompresses .lzma files only with
+        dictionary sizes 2^n and 2^n + 2^(n-1). If some other
+        dictionary size is specified when compressing, the value
+        stored in the Dictionary Size field is a rounded up, but the
+        specified value is still used in the actual compression code.
+
+
+1.1.3. Uncompressed Size
+
+        Uncompressed Size is stored as unsigned 64-bit little endian
+        integer. A special value of 0xFFFF_FFFF_FFFF_FFFF indicates
+        that Uncompressed Size is unknown. End of Payload Marker (*)
+        is used if Uncompressed Size is unknown. End of Payload Marker
+        is allowed but rarely used if Uncompressed Size is known.
+        XZ Utils 5.2.5 and older don't support .lzma files that have
+        End of Payload Marker together with a known Uncompressed Size.
+
+        XZ Utils rejects files whose Uncompressed Size field specifies
+        a known size that is 256 GiB or more. This is to reject false
+        positives when trying to guess if the input file is in the
+        .lzma format. When Uncompressed Size is unknown, there is no
+        limit for the uncompressed size of the file.
+
+        (*) Some tools use the term End of Stream (EOS) marker
+            instead of End of Payload Marker.
+
+
+1.2. LZMA Compressed Data
+
+        Detailed description of the format of this field is out of
+        scope of this document.
+
+
+2. References
+
+        LZMA SDK - The original LZMA implementation
+        http://7-zip.org/sdk.html
+
+        7-Zip
+        http://7-zip.org/
+
+        LZMA Utils - LZMA adapted to POSIX-like systems
+        http://tukaani.org/lzma/
+
+        XZ Utils - The next generation of LZMA Utils
+        http://tukaani.org/xz/
+
+        The .xz file format - The successor of the .lzma format
+        http://tukaani.org/xz/xz-file-format.txt
+
diff --git a/doc/man/pdf-a4/lzmainfo-a4.pdf b/doc/man/pdf-a4/lzmainfo-a4.pdf
new file mode 100644
index 0000000..91f2561
--- /dev/null
+++ b/doc/man/pdf-a4/lzmainfo-a4.pdf
diff --git a/doc/man/pdf-a4/xz-a4.pdf b/doc/man/pdf-a4/xz-a4.pdf
new file mode 100644
index 0000000..6156cfe
--- /dev/null
+++ b/doc/man/pdf-a4/xz-a4.pdf
diff --git a/doc/man/pdf-a4/xzdec-a4.pdf b/doc/man/pdf-a4/xzdec-a4.pdf
new file mode 100644
index 0000000..b3a761a
--- /dev/null
+++ b/doc/man/pdf-a4/xzdec-a4.pdf
diff --git a/doc/man/pdf-a4/xzdiff-a4.pdf b/doc/man/pdf-a4/xzdiff-a4.pdf
new file mode 100644
index 0000000..084cb80
--- /dev/null
+++ b/doc/man/pdf-a4/xzdiff-a4.pdf
diff --git a/doc/man/pdf-a4/xzgrep-a4.pdf b/doc/man/pdf-a4/xzgrep-a4.pdf
new file mode 100644
index 0000000..e3c54fb
--- /dev/null
+++ b/doc/man/pdf-a4/xzgrep-a4.pdf
diff --git a/doc/man/pdf-a4/xzless-a4.pdf b/doc/man/pdf-a4/xzless-a4.pdf
new file mode 100644
index 0000000..01176ed
--- /dev/null
+++ b/doc/man/pdf-a4/xzless-a4.pdf
diff --git a/doc/man/pdf-a4/xzmore-a4.pdf b/doc/man/pdf-a4/xzmore-a4.pdf
new file mode 100644
index 0000000..f829bf4
--- /dev/null
+++ b/doc/man/pdf-a4/xzmore-a4.pdf
diff --git a/doc/man/pdf-letter/lzmainfo-letter.pdf b/doc/man/pdf-letter/lzmainfo-letter.pdf
new file mode 100644
index 0000000..6f2ae17
--- /dev/null
+++ b/doc/man/pdf-letter/lzmainfo-letter.pdf
diff --git a/doc/man/pdf-letter/xz-letter.pdf b/doc/man/pdf-letter/xz-letter.pdf
new file mode 100644
index 0000000..9a733d9
--- /dev/null
+++ b/doc/man/pdf-letter/xz-letter.pdf
diff --git a/doc/man/pdf-letter/xzdec-letter.pdf b/doc/man/pdf-letter/xzdec-letter.pdf
new file mode 100644
index 0000000..89b2ded
--- /dev/null
+++ b/doc/man/pdf-letter/xzdec-letter.pdf
diff --git a/doc/man/pdf-letter/xzdiff-letter.pdf b/doc/man/pdf-letter/xzdiff-letter.pdf
new file mode 100644
index 0000000..143e979
--- /dev/null
+++ b/doc/man/pdf-letter/xzdiff-letter.pdf
diff --git a/doc/man/pdf-letter/xzgrep-letter.pdf b/doc/man/pdf-letter/xzgrep-letter.pdf
new file mode 100644
index 0000000..54fc897
--- /dev/null
+++ b/doc/man/pdf-letter/xzgrep-letter.pdf
diff --git a/doc/man/pdf-letter/xzless-letter.pdf b/doc/man/pdf-letter/xzless-letter.pdf
new file mode 100644
index 0000000..0e48e01
--- /dev/null
+++ b/doc/man/pdf-letter/xzless-letter.pdf
diff --git a/doc/man/pdf-letter/xzmore-letter.pdf b/doc/man/pdf-letter/xzmore-letter.pdf
new file mode 100644
index 0000000..1a019e0
--- /dev/null
+++ b/doc/man/pdf-letter/xzmore-letter.pdf
diff --git a/doc/man/txt/lzmainfo.txt b/doc/man/txt/lzmainfo.txt
new file mode 100644
index 0000000..fa4e51c
--- /dev/null
+++ b/doc/man/txt/lzmainfo.txt
@@ -0,0 +1,40 @@
+LZMAINFO(1)                        XZ Utils                        LZMAINFO(1)
+
+
+
+NAME
+       lzmainfo - show information stored in the .lzma file header
+
+SYNOPSIS
+       lzmainfo [--help] [--version] [file...]
+
+DESCRIPTION
+       lzmainfo  shows  information stored in the .lzma file header.  It reads
+       the first 13 bytes from the specified file,  decodes  the  header,  and
+       prints it to standard output in human readable format.  If no files are
+       given or file is -, standard input is read.
+
+       Usually the most interesting information is the uncompressed  size  and
+       the  dictionary  size.  Uncompressed size can be shown only if the file
+       is in the non-streamed .lzma format variant.  The amount of memory  re-
+       quired to decompress the file is a few dozen kilobytes plus the dictio-
+       nary size.
+
+       lzmainfo is included in XZ Utils primarily for  backward  compatibility
+       with LZMA Utils.
+
+EXIT STATUS
+       0      All is good.
+
+       1      An error occurred.
+
+BUGS
+       lzmainfo  uses  MB  while the correct suffix would be MiB (2^20 bytes).
+       This is to keep the output compatible with LZMA Utils.
+
+SEE ALSO
+       xz(1)
+
+
+
+Tukaani                           2013-06-30                       LZMAINFO(1)
diff --git a/doc/man/txt/xz.txt b/doc/man/txt/xz.txt
new file mode 100644
index 0000000..be24360
--- /dev/null
+++ b/doc/man/txt/xz.txt
@@ -0,0 +1,1589 @@
+XZ(1)                              XZ Utils                              XZ(1)
+
+
+
+NAME
+       xz,  unxz,  xzcat, lzma, unlzma, lzcat - Compress or decompress .xz and
+       .lzma files
+
+SYNOPSIS
+       xz [option...]  [file...]
+
+COMMAND ALIASES
+       unxz is equivalent to xz --decompress.
+       xzcat is equivalent to xz --decompress --stdout.
+       lzma is equivalent to xz --format=lzma.
+       unlzma is equivalent to xz --format=lzma --decompress.
+       lzcat is equivalent to xz --format=lzma --decompress --stdout.
+
+       When writing scripts that need to decompress files, it  is  recommended
+       to  always use the name xz with appropriate arguments (xz -d or xz -dc)
+       instead of the names unxz and xzcat.
+
+DESCRIPTION
+       xz is a general-purpose data compression tool with command line  syntax
+       similar  to  gzip(1)  and  bzip2(1).  The native file format is the .xz
+       format, but the legacy .lzma format used by LZMA  Utils  and  raw  com-
+       pressed  streams  with  no container format headers are also supported.
+       In addition, decompression of the .lz format used by lzip is supported.
+
+       xz compresses or decompresses each file according to the selected oper-
+       ation mode.  If no files are given or file is -, xz reads from standard
+       input and writes the processed data to standard output.  xz will refuse
+       (display  an error and skip the file) to write compressed data to stan-
+       dard output if it is a terminal.  Similarly, xz  will  refuse  to  read
+       compressed data from standard input if it is a terminal.
+
+       Unless  --stdout  is specified, files other than - are written to a new
+       file whose name is derived from the source file name:
+
+       o  When compressing, the suffix of  the  target  file  format  (.xz  or
+          .lzma)  is  appended  to the source filename to get the target file-
+          name.
+
+       o  When decompressing, the .xz, .lzma, or .lz suffix  is  removed  from
+          the  filename  to  get  the target filename.  xz also recognizes the
+          suffixes .txz and .tlz, and replaces them with the .tar suffix.
+
+       If the target file already exists, an error is displayed and  the  file
+       is skipped.
+
+       Unless  writing  to standard output, xz will display a warning and skip
+       the file if any of the following applies:
+
+       o  File is not a regular file.  Symbolic links are  not  followed,  and
+          thus they are not considered to be regular files.
+
+       o  File has more than one hard link.
+
+       o  File has setuid, setgid, or sticky bit set.
+
+       o  The  operation  mode  is  set to compress and the file already has a
+          suffix of the target file format (.xz or .txz  when  compressing  to
+          the .xz format, and .lzma or .tlz when compressing to the .lzma for-
+          mat).
+
+       o  The operation mode is set to decompress and the file doesn't have  a
+          suffix of any of the supported file formats (.xz, .txz, .lzma, .tlz,
+          or .lz).
+
+       After successfully compressing or decompressing the file, xz copies the
+       owner,  group, permissions, access time, and modification time from the
+       source file to the target file.  If copying the group fails,  the  per-
+       missions are modified so that the target file doesn't become accessible
+       to users who didn't have permission to  access  the  source  file.   xz
+       doesn't support copying other metadata like access control lists or ex-
+       tended attributes yet.
+
+       Once the target file has been successfully closed, the source  file  is
+       removed  unless --keep was specified.  The source file is never removed
+       if the output is written to standard output or if an error occurs.
+
+       Sending SIGINFO or SIGUSR1 to the xz process makes  it  print  progress
+       information  to  standard  error.  This has only limited use since when
+       standard error is a terminal, using --verbose will display an automati-
+       cally updating progress indicator.
+
+   Memory usage
+       The  memory  usage of xz varies from a few hundred kilobytes to several
+       gigabytes depending on the compression  settings.   The  settings  used
+       when compressing a file determine the memory requirements of the decom-
+       pressor.  Typically the decompressor needs 5 % to 20 % of the amount of
+       memory that the compressor needed when creating the file.  For example,
+       decompressing a file created with xz -9 currently  requires  65 MiB  of
+       memory.   Still,  it is possible to have .xz files that require several
+       gigabytes of memory to decompress.
+
+       Especially users of older systems may  find  the  possibility  of  very
+       large  memory  usage  annoying.  To prevent uncomfortable surprises, xz
+       has a built-in memory usage limiter,  which  is  disabled  by  default.
+       While  some operating systems provide ways to limit the memory usage of
+       processes, relying on it wasn't deemed to be flexible enough (for exam-
+       ple, using ulimit(1) to limit virtual memory tends to cripple mmap(2)).
+
+       The  memory  usage  limiter can be enabled with the command line option
+       --memlimit=limit.  Often it is more convenient to enable the limiter by
+       default  by  setting the environment variable XZ_DEFAULTS, for example,
+       XZ_DEFAULTS=--memlimit=150MiB.  It is possible to set the limits  sepa-
+       rately  for  compression  and  decompression  by  using --memlimit-com-
+       press=limit and --memlimit-decompress=limit.  Using these  two  options
+       outside  XZ_DEFAULTS is rarely useful because a single run of xz cannot
+       do both compression  and  decompression  and  --memlimit=limit  (or  -M
+       limit) is shorter to type on the command line.
+
+       If  the specified memory usage limit is exceeded when decompressing, xz
+       will display an error and decompressing the file  will  fail.   If  the
+       limit  is  exceeded when compressing, xz will try to scale the settings
+       down so that the limit is no longer exceeded (except when using  --for-
+       mat=raw  or --no-adjust).  This way the operation won't fail unless the
+       limit is very small.  The scaling of the settings is done in steps that
+       don't match the compression level presets, for example, if the limit is
+       only slightly less than the amount required for  xz  -9,  the  settings
+       will be scaled down only a little, not all the way down to xz -8.
+
+   Concatenation and padding with .xz files
+       It is possible to concatenate .xz files as is.  xz will decompress such
+       files as if they were a single .xz file.
+
+       It is possible to insert padding between the concatenated parts or  af-
+       ter the last part.  The padding must consist of null bytes and the size
+       of the padding must be a multiple of four bytes.  This can  be  useful,
+       for  example,  if the .xz file is stored on a medium that measures file
+       sizes in 512-byte blocks.
+
+       Concatenation and padding are not  allowed  with  .lzma  files  or  raw
+       streams.
+
+OPTIONS
+   Integer suffixes and special values
+       In  most places where an integer argument is expected, an optional suf-
+       fix is supported to easily indicate large integers.  There must  be  no
+       space between the integer and the suffix.
+
+       KiB    Multiply  the integer by 1,024 (2^10).  Ki, k, kB, K, and KB are
+              accepted as synonyms for KiB.
+
+       MiB    Multiply the integer by 1,048,576 (2^20).  Mi, m, M, and MB  are
+              accepted as synonyms for MiB.
+
+       GiB    Multiply  the integer by 1,073,741,824 (2^30).  Gi, g, G, and GB
+              are accepted as synonyms for GiB.
+
+       The special value max can be used to indicate the maximum integer value
+       supported by the option.
+
+   Operation mode
+       If  multiple  operation  mode options are given, the last one takes ef-
+       fect.
+
+       -z, --compress
+              Compress.  This is the default operation mode when no  operation
+              mode  option is specified and no other operation mode is implied
+              from the command name (for example, unxz implies --decompress).
+
+       -d, --decompress, --uncompress
+              Decompress.
+
+       -t, --test
+              Test the integrity of compressed files.  This option is  equiva-
+              lent  to --decompress --stdout except that the decompressed data
+              is discarded instead of being written to  standard  output.   No
+              files are created or removed.
+
+       -l, --list
+              Print  information about compressed files.  No uncompressed out-
+              put is produced, and no files are created or removed.   In  list
+              mode,  the program cannot read the compressed data from standard
+              input or from other unseekable sources.
+
+              The default listing shows basic  information  about  files,  one
+              file  per  line.  To get more detailed information, use also the
+              --verbose option.  For  even  more  information,  use  --verbose
+              twice,  but  note that this may be slow, because getting all the
+              extra information requires many seeks.   The  width  of  verbose
+              output exceeds 80 characters, so piping the output to, for exam-
+              ple, less -S may  be  convenient  if  the  terminal  isn't  wide
+              enough.
+
+              The  exact output may vary between xz versions and different lo-
+              cales.  For machine-readable output, --robot  --list  should  be
+              used.
+
+   Operation modifiers
+       -k, --keep
+              Don't delete the input files.
+
+              Since xz 5.2.6, this option also makes xz compress or decompress
+              even if the input is a symbolic link to a regular file, has more
+              than  one  hard  link,  or has the setuid, setgid, or sticky bit
+              set.  The setuid, setgid, and sticky bits are not copied to  the
+              target  file.   In  earlier  versions  this  was  only done with
+              --force.
+
+       -f, --force
+              This option has several effects:
+
+              o  If the target file already exists, delete it before compress-
+                 ing or decompressing.
+
+              o  Compress  or  decompress even if the input is a symbolic link
+                 to a regular file, has more than one hard link,  or  has  the
+                 setuid,  setgid,  or sticky bit set.  The setuid, setgid, and
+                 sticky bits are not copied to the target file.
+
+              o  When used with --decompress --stdout and xz cannot  recognize
+                 the  type  of  the source file, copy the source file as is to
+                 standard output.  This allows xzcat --force to be  used  like
+                 cat(1) for files that have not been compressed with xz.  Note
+                 that in future, xz might support new compressed file formats,
+                 which  may  make xz decompress more types of files instead of
+                 copying them as is to standard output.   --format=format  can
+                 be  used to restrict xz to decompress only a single file for-
+                 mat.
+
+       -c, --stdout, --to-stdout
+              Write the compressed or decompressed data to standard output in-
+              stead of a file.  This implies --keep.
+
+       --single-stream
+              Decompress only the first .xz stream, and silently ignore possi-
+              ble remaining input data following the  stream.   Normally  such
+              trailing garbage makes xz display an error.
+
+              xz  never  decompresses more than one stream from .lzma files or
+              raw streams, but this option still makes xz ignore the  possible
+              trailing data after the .lzma file or raw stream.
+
+              This  option has no effect if the operation mode is not --decom-
+              press or --test.
+
+       --no-sparse
+              Disable creation of sparse files.  By default, if  decompressing
+              into a regular file, xz tries to make the file sparse if the de-
+              compressed data contains long sequences  of  binary  zeros.   It
+              also  works  when writing to standard output as long as standard
+              output is connected to a regular  file  and  certain  additional
+              conditions  are  met to make it safe.  Creating sparse files may
+              save disk space and speed up the decompression by  reducing  the
+              amount of disk I/O.
+
+       -S .suf, --suffix=.suf
+              When compressing, use .suf as the suffix for the target file in-
+              stead of .xz or .lzma.  If not writing to  standard  output  and
+              the  source  file already has the suffix .suf, a warning is dis-
+              played and the file is skipped.
+
+              When decompressing, recognize files with the suffix .suf in  ad-
+              dition  to files with the .xz, .txz, .lzma, .tlz, or .lz suffix.
+              If the source file has the suffix .suf, the suffix is removed to
+              get the target filename.
+
+              When  compressing  or  decompressing raw streams (--format=raw),
+              the suffix must always be specified unless writing  to  standard
+              output, because there is no default suffix for raw streams.
+
+       --files[=file]
+              Read  the  filenames  to  process from file; if file is omitted,
+              filenames are read from standard input.  Filenames must be  ter-
+              minated  with  the  newline character.  A dash (-) is taken as a
+              regular filename; it doesn't mean standard input.  If  filenames
+              are given also as command line arguments, they are processed be-
+              fore the filenames read from file.
+
+       --files0[=file]
+              This is identical to --files[=file] except  that  each  filename
+              must be terminated with the null character.
+
+   Basic file format and compression options
+       -F format, --format=format
+              Specify the file format to compress or decompress:
+
+              auto   This  is  the default.  When compressing, auto is equiva-
+                     lent to xz.  When decompressing, the format of the  input
+                     file  is  automatically  detected.  Note that raw streams
+                     (created with --format=raw) cannot be auto-detected.
+
+              xz     Compress to the .xz file format, or accept only .xz files
+                     when decompressing.
+
+              lzma, alone
+                     Compress  to the legacy .lzma file format, or accept only
+                     .lzma files when  decompressing.   The  alternative  name
+                     alone  is  provided for backwards compatibility with LZMA
+                     Utils.
+
+              lzip   Accept only .lz files when decompressing.  Compression is
+                     not supported.
+
+                     The .lz format version 0 and the unextended version 1 are
+                     supported.  Version 0 files were produced by lzip 1.3 and
+                     older.   Such  files  aren't common but may be found from
+                     file archives as a few source packages were  released  in
+                     this  format.   People  might  have old personal files in
+                     this format too.  Decompression support  for  the  format
+                     version 0 was removed in lzip 1.18.
+
+                     lzip  1.4 and later create files in the format version 1.
+                     The sync flush marker extension to the format  version  1
+                     was added in lzip 1.6.  This extension is rarely used and
+                     isn't supported by xz (diagnosed as corrupt input).
+
+              raw    Compress or uncompress a raw stream (no  headers).   This
+                     is meant for advanced users only.  To decode raw streams,
+                     you need use --format=raw and explicitly specify the fil-
+                     ter  chain,  which normally would have been stored in the
+                     container headers.
+
+       -C check, --check=check
+              Specify the type of the integrity check.  The  check  is  calcu-
+              lated  from  the  uncompressed  data and stored in the .xz file.
+              This option has an effect only when  compressing  into  the  .xz
+              format;  the .lzma format doesn't support integrity checks.  The
+              integrity check (if any) is verified when the .xz file is decom-
+              pressed.
+
+              Supported check types:
+
+              none   Don't  calculate an integrity check at all.  This is usu-
+                     ally a bad idea.  This can be useful  when  integrity  of
+                     the data is verified by other means anyway.
+
+              crc32  Calculate  CRC32  using  the  polynomial  from IEEE-802.3
+                     (Ethernet).
+
+              crc64  Calculate CRC64 using the polynomial from ECMA-182.  This
+                     is the default, since it is slightly better than CRC32 at
+                     detecting damaged files and the speed difference is  neg-
+                     ligible.
+
+              sha256 Calculate  SHA-256.   This  is somewhat slower than CRC32
+                     and CRC64.
+
+              Integrity of the .xz headers is always verified with CRC32.   It
+              is not possible to change or disable it.
+
+       --ignore-check
+              Don't verify the integrity check of the compressed data when de-
+              compressing.  The CRC32 values in the .xz headers will still  be
+              verified normally.
+
+              Do not use this option unless you know what you are doing.  Pos-
+              sible reasons to use this option:
+
+              o  Trying to recover data from a corrupt .xz file.
+
+              o  Speeding up decompression.  This matters mostly with  SHA-256
+                 or with files that have compressed extremely well.  It's rec-
+                 ommended to not use this option for this purpose  unless  the
+                 file integrity is verified externally in some other way.
+
+       -0 ... -9
+              Select  a compression preset level.  The default is -6.  If mul-
+              tiple preset levels are specified, the last  one  takes  effect.
+              If  a  custom filter chain was already specified, setting a com-
+              pression preset level clears the custom filter chain.
+
+              The differences between the presets are  more  significant  than
+              with  gzip(1)  and  bzip2(1).  The selected compression settings
+              determine the memory requirements of the decompressor, thus  us-
+              ing  a too high preset level might make it painful to decompress
+              the file on an old system with little RAM.   Specifically,  it's
+              not  a  good idea to blindly use -9 for everything like it often
+              is with gzip(1) and bzip2(1).
+
+              -0 ... -3
+                     These are somewhat fast presets.  -0 is sometimes  faster
+                     than  gzip  -9 while compressing much better.  The higher
+                     ones often have speed comparable to bzip2(1) with  compa-
+                     rable  or  better compression ratio, although the results
+                     depend a lot on the type of data being compressed.
+
+              -4 ... -6
+                     Good to very good compression while keeping  decompressor
+                     memory  usage reasonable even for old systems.  -6 is the
+                     default, which is usually a good choice for  distributing
+                     files that need to be decompressible even on systems with
+                     only 16 MiB RAM.  (-5e or -6e may  be  worth  considering
+                     too.  See --extreme.)
+
+              -7 ... -9
+                     These  are  like -6 but with higher compressor and decom-
+                     pressor memory requirements.  These are useful only  when
+                     compressing  files bigger than 8 MiB, 16 MiB, and 32 MiB,
+                     respectively.
+
+              On the same hardware, the decompression speed is approximately a
+              constant  number  of  bytes  of  compressed data per second.  In
+              other words, the better the compression, the faster  the  decom-
+              pression  will  usually  be.  This also means that the amount of
+              uncompressed output produced per second can vary a lot.
+
+              The following table summarises the features of the presets:
+
+                     Preset   DictSize   CompCPU   CompMem   DecMem
+                       -0     256 KiB       0        3 MiB    1 MiB
+                       -1       1 MiB       1        9 MiB    2 MiB
+                       -2       2 MiB       2       17 MiB    3 MiB
+                       -3       4 MiB       3       32 MiB    5 MiB
+                       -4       4 MiB       4       48 MiB    5 MiB
+                       -5       8 MiB       5       94 MiB    9 MiB
+                       -6       8 MiB       6       94 MiB    9 MiB
+                       -7      16 MiB       6      186 MiB   17 MiB
+                       -8      32 MiB       6      370 MiB   33 MiB
+                       -9      64 MiB       6      674 MiB   65 MiB
+
+              Column descriptions:
+
+              o  DictSize is the LZMA2 dictionary size.  It is waste of memory
+                 to  use a dictionary bigger than the size of the uncompressed
+                 file.  This is why it is good to avoid using the  presets  -7
+                 ...  -9 when there's no real need for them.  At -6 and lower,
+                 the amount of memory wasted is usually low enough to not mat-
+                 ter.
+
+              o  CompCPU  is a simplified representation of the LZMA2 settings
+                 that affect compression speed.  The dictionary  size  affects
+                 speed too, so while CompCPU is the same for levels -6 ... -9,
+                 higher levels still tend to be a little slower.  To get  even
+                 slower and thus possibly better compression, see --extreme.
+
+              o  CompMem  contains  the  compressor memory requirements in the
+                 single-threaded mode.  It may vary slightly between  xz  ver-
+                 sions.   Memory  requirements  of  some  of the future multi-
+                 threaded modes may be dramatically higher than  that  of  the
+                 single-threaded mode.
+
+              o  DecMem  contains  the decompressor memory requirements.  That
+                 is, the compression settings determine  the  memory  require-
+                 ments of the decompressor.  The exact decompressor memory us-
+                 age is slightly more than the LZMA2 dictionary size, but  the
+                 values  in  the  table  have been rounded up to the next full
+                 MiB.
+
+       -e, --extreme
+              Use a slower variant of the selected  compression  preset  level
+              (-0 ... -9) to hopefully get a little bit better compression ra-
+              tio, but with bad luck this can also make it worse.   Decompres-
+              sor  memory  usage  is not affected, but compressor memory usage
+              increases a little at preset levels -0 ... -3.
+
+              Since there are two presets  with  dictionary  sizes  4 MiB  and
+              8 MiB,  the  presets  -3e  and  -5e use slightly faster settings
+              (lower CompCPU) than -4e and -6e, respectively.  That way no two
+              presets are identical.
+
+                     Preset   DictSize   CompCPU   CompMem   DecMem
+                      -0e     256 KiB       8        4 MiB    1 MiB
+                      -1e       1 MiB       8       13 MiB    2 MiB
+                      -2e       2 MiB       8       25 MiB    3 MiB
+                      -3e       4 MiB       7       48 MiB    5 MiB
+                      -4e       4 MiB       8       48 MiB    5 MiB
+                      -5e       8 MiB       7       94 MiB    9 MiB
+                      -6e       8 MiB       8       94 MiB    9 MiB
+                      -7e      16 MiB       8      186 MiB   17 MiB
+                      -8e      32 MiB       8      370 MiB   33 MiB
+                      -9e      64 MiB       8      674 MiB   65 MiB
+
+              For  example,  there  are a total of four presets that use 8 MiB
+              dictionary, whose order from the fastest to the slowest  is  -5,
+              -6, -5e, and -6e.
+
+       --fast
+       --best These  are  somewhat  misleading  aliases for -0 and -9, respec-
+              tively.  These are provided  only  for  backwards  compatibility
+              with LZMA Utils.  Avoid using these options.
+
+       --block-size=size
+              When  compressing  to  the .xz format, split the input data into
+              blocks of size bytes.  The blocks are  compressed  independently
+              from each other, which helps with multi-threading and makes lim-
+              ited random-access decompression possible.  This option is typi-
+              cally  used to override the default block size in multi-threaded
+              mode, but this option can be used in single-threaded mode too.
+
+              In multi-threaded mode about three times size bytes will be  al-
+              located  in each thread for buffering input and output.  The de-
+              fault size is three times the LZMA2 dictionary size  or  1  MiB,
+              whichever is more.  Typically a good value is 2-4 times the size
+              of the LZMA2 dictionary or at least 1 MiB.  Using size less than
+              the LZMA2 dictionary size is waste of RAM because then the LZMA2
+              dictionary buffer will never get fully used.  The sizes  of  the
+              blocks  are  stored in the block headers, which a future version
+              of xz will use for multi-threaded decompression.
+
+              In single-threaded mode no block splitting is done  by  default.
+              Setting this option doesn't affect memory usage.  No size infor-
+              mation is stored in block headers, thus files created in single-
+              threaded  mode  won't  be  identical  to files created in multi-
+              threaded mode.  The lack of size information also means  that  a
+              future  version  of  xz  won't  be  able decompress the files in
+              multi-threaded mode.
+
+       --block-list=sizes
+              When compressing to the .xz format, start a new block after  the
+              given intervals of uncompressed data.
+
+              The  uncompressed  sizes of the blocks are specified as a comma-
+              separated list.  Omitting a size (two or more  consecutive  com-
+              mas) is a shorthand to use the size of the previous block.
+
+              If  the  input  file  is  bigger than the sum of sizes, the last
+              value in sizes is repeated until the end of the file.  A special
+              value  of  0  may be used as the last value to indicate that the
+              rest of the file should be encoded as a single block.
+
+              If one specifies sizes that exceed the encoder's block size (ei-
+              ther  the  default value in threaded mode or the value specified
+              with --block-size=size),  the  encoder  will  create  additional
+              blocks while keeping the boundaries specified in sizes.  For ex-
+              ample,      if      one       specifies       --block-size=10MiB
+              --block-list=5MiB,10MiB,8MiB,12MiB,24MiB  and  the input file is
+              80 MiB, one will get 11 blocks: 5, 10, 8, 10, 2, 10, 10, 4,  10,
+              10, and 1 MiB.
+
+              In multi-threaded mode the sizes of the blocks are stored in the
+              block headers.  This isn't done in single-threaded mode, so  the
+              encoded  output won't be identical to that of the multi-threaded
+              mode.
+
+       --flush-timeout=timeout
+              When compressing, if more than timeout milliseconds (a  positive
+              integer)  has  passed  since the previous flush and reading more
+              input would block, all the pending input data  is  flushed  from
+              the  encoder  and made available in the output stream.  This can
+              be useful if xz is used to compress data that is streamed over a
+              network.   Small  timeout  values make the data available at the
+              receiving end with a small delay, but large timeout values  give
+              better compression ratio.
+
+              This  feature  is disabled by default.  If this option is speci-
+              fied more than once, the last one  takes  effect.   The  special
+              timeout  value  of 0 can be used to explicitly disable this fea-
+              ture.
+
+              This feature is not available on non-POSIX systems.
+
+              This feature is still experimental.  Currently xz is  unsuitable
+              for  decompressing  the  stream  in real time due to how xz does
+              buffering.
+
+       --memlimit-compress=limit
+              Set a memory usage limit for compression.   If  this  option  is
+              specified multiple times, the last one takes effect.
+
+              If the compression settings exceed the limit, xz will attempt to
+              adjust the settings downwards so that the limit is no longer ex-
+              ceeded  and display a notice that automatic adjustment was done.
+              The adjustments are done in this order: reducing the  number  of
+              threads, switching to single-threaded mode if even one thread in
+              multi-threaded mode exceeds the limit, and finally reducing  the
+              LZMA2 dictionary size.
+
+              When  compressing  with  --format=raw or if --no-adjust has been
+              specified, only the number of threads may be  reduced  since  it
+              can be done without affecting the compressed output.
+
+              If  the  limit cannot be met even with the adjustments described
+              above, an error is displayed and xz will exit with  exit  status
+              1.
+
+              The limit can be specified in multiple ways:
+
+              o  The  limit can be an absolute value in bytes.  Using an inte-
+                 ger suffix like MiB can be useful.  Example:  --memlimit-com-
+                 press=80MiB
+
+              o  The  limit can be specified as a percentage of total physical
+                 memory (RAM).  This can be useful especially when setting the
+                 XZ_DEFAULTS  environment  variable  in a shell initialization
+                 script that is shared between different computers.  That  way
+                 the  limit  is automatically bigger on systems with more mem-
+                 ory.  Example: --memlimit-compress=70%
+
+              o  The limit can be reset back to its default value  by  setting
+                 it  to  0.  This is currently equivalent to setting the limit
+                 to max (no memory usage limit).
+
+              For 32-bit xz there is a special case: if  the  limit  would  be
+              over 4020 MiB, the limit is set to 4020 MiB.  On MIPS32 2000 MiB
+              is used instead.  (The values 0 and max aren't affected by this.
+              A similar feature doesn't exist for decompression.)  This can be
+              helpful when a 32-bit executable has  access  to  4 GiB  address
+              space  (2  GiB on MIPS32) while hopefully doing no harm in other
+              situations.
+
+              See also the section Memory usage.
+
+       --memlimit-decompress=limit
+              Set a memory usage limit for decompression.  This  also  affects
+              the  --list  mode.  If the operation is not possible without ex-
+              ceeding the limit, xz will display an  error  and  decompressing
+              the  file will fail.  See --memlimit-compress=limit for possible
+              ways to specify the limit.
+
+       --memlimit-mt-decompress=limit
+              Set a memory usage limit for multi-threaded decompression.  This
+              can  only  affect the number of threads; this will never make xz
+              refuse to decompress a file.  If limit is too low to  allow  any
+              multi-threading,  the  limit  is ignored and xz will continue in
+              single-threaded mode.  Note that if  also  --memlimit-decompress
+              is used, it will always apply to both single-threaded and multi-
+              threaded modes, and so the effective limit  for  multi-threading
+              will  never  be higher than the limit set with --memlimit-decom-
+              press.
+
+              In contrast to the other  memory  usage  limit  options,  --mem-
+              limit-mt-decompress=limit  has  a system-specific default limit.
+              xz --info-memory can be used to see the current value.
+
+              This option and its default  value  exist  because  without  any
+              limit  the  threaded decompressor could end up allocating an in-
+              sane amount of memory with some input  files.   If  the  default
+              limit is too low on your system, feel free to increase the limit
+              but never set it to a value larger than the amount of usable RAM
+              as  with  appropriate  input  files  xz will attempt to use that
+              amount of memory even with a low number of threads.  Running out
+              of  memory  or  swapping  will not improve decompression perfor-
+              mance.
+
+              See --memlimit-compress=limit for possible ways to  specify  the
+              limit.   Setting limit to 0 resets the limit to the default sys-
+              tem-specific value.
+
+
+
+       -M limit, --memlimit=limit, --memory=limit
+              This  is  equivalent  to  specifying   --memlimit-compress=limit
+              --memlimit-decompress=limit --memlimit-mt-decompress=limit.
+
+       --no-adjust
+              Display  an  error  and exit if the memory usage limit cannot be
+              met without adjusting settings that affect the  compressed  out-
+              put.   That is, this prevents xz from switching the encoder from
+              multi-threaded mode to single-threaded mode  and  from  reducing
+              the  LZMA2  dictionary  size.  Even when this option is used the
+              number of threads may be reduced to meet the memory usage  limit
+              as that won't affect the compressed output.
+
+              Automatic adjusting is always disabled when creating raw streams
+              (--format=raw).
+
+       -T threads, --threads=threads
+              Specify the number of worker threads to use.  Setting threads to
+              a special value 0 makes xz use up to as many threads as the pro-
+              cessor(s) on the system support.  The actual number  of  threads
+              can  be  fewer  than threads if the input file is not big enough
+              for threading with the given settings or if using  more  threads
+              would exceed the memory usage limit.
+
+              The  single-threaded and multi-threaded compressors produce dif-
+              ferent output.  Single-threaded compressor will give the  small-
+              est  file  size but only the output from the multi-threaded com-
+              pressor can be decompressed  using  multiple  threads.   Setting
+              threads to 1 will use the single-threaded mode.  Setting threads
+              to any other value, including 0,  will  use  the  multi-threaded
+              compressor even if the system supports only one hardware thread.
+              (xz 5.2.x used single-threaded mode in this situation.)
+
+              To use multi-threaded mode with only one thread, set threads  to
+              +1.   The  +  prefix  has no effect with values other than 1.  A
+              memory usage limit can still make xz switch  to  single-threaded
+              mode  unless  --no-adjust is used.  Support for the + prefix was
+              added in xz 5.4.0.
+
+              If an automatic number of threads has been requested and no mem-
+              ory  usage  limit has been specified, then a system-specific de-
+              fault soft limit will be used to possibly limit  the  number  of
+              threads.   It is a soft limit in sense that it is ignored if the
+              number of threads becomes one, thus a soft limit will never stop
+              xz  from  compressing or decompressing.  This default soft limit
+              will not make xz switch  from  multi-threaded  mode  to  single-
+              threaded   mode.    The  active  limits  can  be  seen  with  xz
+              --info-memory.
+
+              Currently the only threading method is to split the  input  into
+              blocks and compress them independently from each other.  The de-
+              fault block size depends on the compression  level  and  can  be
+              overridden with the --block-size=size option.
+
+              Threaded decompression only works on files that contain multiple
+              blocks with size information in block headers.  All large enough
+              files compressed in multi-threaded mode meet this condition, but
+              files  compressed  in  single-threaded  mode   don't   even   if
+              --block-size=size has been used.
+
+   Custom compressor filter chains
+       A custom filter chain allows specifying the compression settings in de-
+       tail instead of relying on the  settings  associated  to  the  presets.
+       When  a custom filter chain is specified, preset options (-0 ... -9 and
+       --extreme) earlier on the command line are forgotten.  If a preset  op-
+       tion  is  specified  after one or more custom filter chain options, the
+       new preset takes effect and the custom filter chain  options  specified
+       earlier are forgotten.
+
+       A  filter chain is comparable to piping on the command line.  When com-
+       pressing, the uncompressed input goes to the first filter, whose output
+       goes  to  the next filter (if any).  The output of the last filter gets
+       written to the compressed file.  The maximum number of filters  in  the
+       chain  is  four,  but typically a filter chain has only one or two fil-
+       ters.
+
+       Many filters have limitations on where they can be in the filter chain:
+       some  filters  can work only as the last filter in the chain, some only
+       as a non-last filter, and some work in any position in the chain.   De-
+       pending on the filter, this limitation is either inherent to the filter
+       design or exists to prevent security issues.
+
+       A custom filter chain is specified by using one or more filter  options
+       in  the  order they are wanted in the filter chain.  That is, the order
+       of filter options is significant!  When decoding  raw  streams  (--for-
+       mat=raw),  the  filter  chain  is specified in the same order as it was
+       specified when compressing.
+
+       Filters take filter-specific options as a comma-separated list.   Extra
+       commas  in  options  are ignored.  Every option has a default value, so
+       you need to specify only those you want to change.
+
+       To see the whole filter chain and options, use xz  -vv  (that  is,  use
+       --verbose twice).  This works also for viewing the filter chain options
+       used by presets.
+
+       --lzma1[=options]
+       --lzma2[=options]
+              Add LZMA1 or LZMA2 filter to the filter  chain.   These  filters
+              can be used only as the last filter in the chain.
+
+              LZMA1  is  a legacy filter, which is supported almost solely due
+              to the legacy .lzma file  format,  which  supports  only  LZMA1.
+              LZMA2  is  an updated version of LZMA1 to fix some practical is-
+              sues of LZMA1.  The .xz format uses LZMA2  and  doesn't  support
+              LZMA1  at  all.  Compression speed and ratios of LZMA1 and LZMA2
+              are practically the same.
+
+              LZMA1 and LZMA2 share the same set of options:
+
+              preset=preset
+                     Reset all LZMA1 or LZMA2 options to preset.  Preset  con-
+                     sist  of an integer, which may be followed by single-let-
+                     ter preset modifiers.  The integer can be from  0  to  9,
+                     matching  the  command  line options -0 ... -9.  The only
+                     supported modifier is currently e,  which  matches  --ex-
+                     treme.   If no preset is specified, the default values of
+                     LZMA1 or LZMA2 options are taken from the preset 6.
+
+              dict=size
+                     Dictionary (history buffer) size indicates how many bytes
+                     of  the  recently  processed uncompressed data is kept in
+                     memory.  The algorithm tries to find repeating  byte  se-
+                     quences  (matches)  in the uncompressed data, and replace
+                     them with references to the data currently in the dictio-
+                     nary.   The  bigger  the  dictionary,  the  higher is the
+                     chance to find a match.  Thus, increasing dictionary size
+                     usually improves compression ratio, but a dictionary big-
+                     ger than the uncompressed file is waste of memory.
+
+                     Typical dictionary size is from 64 KiB  to  64 MiB.   The
+                     minimum  is  4 KiB.   The maximum for compression is cur-
+                     rently 1.5 GiB (1536 MiB).  The decompressor already sup-
+                     ports  dictionaries up to one byte less than 4 GiB, which
+                     is the maximum for the LZMA1 and LZMA2 stream formats.
+
+                     Dictionary size and match finder (mf) together  determine
+                     the memory usage of the LZMA1 or LZMA2 encoder.  The same
+                     (or bigger) dictionary size is required for decompressing
+                     that  was used when compressing, thus the memory usage of
+                     the decoder is determined by  the  dictionary  size  used
+                     when  compressing.   The .xz headers store the dictionary
+                     size either as 2^n or 2^n + 2^(n-1), so these  sizes  are
+                     somewhat preferred for compression.  Other sizes will get
+                     rounded up when stored in the .xz headers.
+
+              lc=lc  Specify the number of literal context bits.  The  minimum
+                     is  0  and  the maximum is 4; the default is 3.  In addi-
+                     tion, the sum of lc and lp must not exceed 4.
+
+                     All bytes that cannot be encoded as matches  are  encoded
+                     as  literals.   That  is, literals are simply 8-bit bytes
+                     that are encoded one at a time.
+
+                     The literal coding makes an assumption that  the  highest
+                     lc  bits of the previous uncompressed byte correlate with
+                     the next byte.  For example, in typical English text,  an
+                     upper-case  letter is often followed by a lower-case let-
+                     ter, and a lower-case letter is usually followed  by  an-
+                     other  lower-case letter.  In the US-ASCII character set,
+                     the highest three bits are 010 for upper-case letters and
+                     011  for  lower-case letters.  When lc is at least 3, the
+                     literal coding can take advantage of this property in the
+                     uncompressed data.
+
+                     The default value (3) is usually good.  If you want maxi-
+                     mum compression, test lc=4.  Sometimes it helps a little,
+                     and sometimes it makes compression worse.  If it makes it
+                     worse, test lc=2 too.
+
+              lp=lp  Specify the number of literal position bits.  The minimum
+                     is 0 and the maximum is 4; the default is 0.
+
+                     Lp  affects  what  kind  of alignment in the uncompressed
+                     data is assumed when encoding literals.  See pb below for
+                     more information about alignment.
+
+              pb=pb  Specify  the  number  of position bits.  The minimum is 0
+                     and the maximum is 4; the default is 2.
+
+                     Pb affects what kind of  alignment  in  the  uncompressed
+                     data  is assumed in general.  The default means four-byte
+                     alignment (2^pb=2^2=4), which is often a good choice when
+                     there's no better guess.
+
+                     When  the  alignment is known, setting pb accordingly may
+                     reduce the file size a little.  For  example,  with  text
+                     files  having  one-byte  alignment (US-ASCII, ISO-8859-*,
+                     UTF-8), setting pb=0 can  improve  compression  slightly.
+                     For UTF-16 text, pb=1 is a good choice.  If the alignment
+                     is an odd number like 3 bytes, pb=0  might  be  the  best
+                     choice.
+
+                     Even though the assumed alignment can be adjusted with pb
+                     and lp, LZMA1 and  LZMA2  still  slightly  favor  16-byte
+                     alignment.   It  might  be worth taking into account when
+                     designing file formats that are likely to be  often  com-
+                     pressed with LZMA1 or LZMA2.
+
+              mf=mf  Match  finder has a major effect on encoder speed, memory
+                     usage, and compression ratio.  Usually Hash  Chain  match
+                     finders  are  faster than Binary Tree match finders.  The
+                     default depends on the preset: 0 uses hc3, 1-3  use  hc4,
+                     and the rest use bt4.
+
+                     The  following  match  finders are supported.  The memory
+                     usage formulas below are rough approximations, which  are
+                     closest to the reality when dict is a power of two.
+
+                     hc3    Hash Chain with 2- and 3-byte hashing
+                            Minimum value for nice: 3
+                            Memory usage:
+                            dict * 7.5 (if dict <= 16 MiB);
+                            dict * 5.5 + 64 MiB (if dict > 16 MiB)
+
+                     hc4    Hash Chain with 2-, 3-, and 4-byte hashing
+                            Minimum value for nice: 4
+                            Memory usage:
+                            dict * 7.5 (if dict <= 32 MiB);
+                            dict * 6.5 (if dict > 32 MiB)
+
+                     bt2    Binary Tree with 2-byte hashing
+                            Minimum value for nice: 2
+                            Memory usage: dict * 9.5
+
+                     bt3    Binary Tree with 2- and 3-byte hashing
+                            Minimum value for nice: 3
+                            Memory usage:
+                            dict * 11.5 (if dict <= 16 MiB);
+                            dict * 9.5 + 64 MiB (if dict > 16 MiB)
+
+                     bt4    Binary Tree with 2-, 3-, and 4-byte hashing
+                            Minimum value for nice: 4
+                            Memory usage:
+                            dict * 11.5 (if dict <= 32 MiB);
+                            dict * 10.5 (if dict > 32 MiB)
+
+              mode=mode
+                     Compression mode specifies the method to analyze the data
+                     produced by the match finder.  Supported modes  are  fast
+                     and normal.  The default is fast for presets 0-3 and nor-
+                     mal for presets 4-9.
+
+                     Usually fast is used with Hash Chain  match  finders  and
+                     normal with Binary Tree match finders.  This is also what
+                     the presets do.
+
+              nice=nice
+                     Specify what is considered to be  a  nice  length  for  a
+                     match.  Once a match of at least nice bytes is found, the
+                     algorithm stops looking for possibly better matches.
+
+                     Nice can be 2-273 bytes.  Higher values tend to give bet-
+                     ter  compression  ratio at the expense of speed.  The de-
+                     fault depends on the preset.
+
+              depth=depth
+                     Specify the maximum search depth  in  the  match  finder.
+                     The  default  is  the special value of 0, which makes the
+                     compressor determine a reasonable depth from mf and nice.
+
+                     Reasonable depth for Hash Chains is 4-100 and 16-1000 for
+                     Binary  Trees.  Using very high values for depth can make
+                     the encoder extremely slow with some files.   Avoid  set-
+                     ting  the  depth over 1000 unless you are prepared to in-
+                     terrupt the compression in case  it  is  taking  far  too
+                     long.
+
+              When  decoding  raw streams (--format=raw), LZMA2 needs only the
+              dictionary size.  LZMA1 needs also lc, lp, and pb.
+
+       --x86[=options]
+       --arm[=options]
+       --armthumb[=options]
+       --arm64[=options]
+       --powerpc[=options]
+       --ia64[=options]
+       --sparc[=options]
+              Add a branch/call/jump (BCJ) filter to the filter chain.   These
+              filters  can  be  used  only  as a non-last filter in the filter
+              chain.
+
+              A BCJ filter converts relative addresses in the machine code  to
+              their  absolute  counterparts.   This doesn't change the size of
+              the data but it increases redundancy, which can  help  LZMA2  to
+              produce 0-15 % smaller .xz file.  The BCJ filters are always re-
+              versible, so using a BCJ filter for wrong type of  data  doesn't
+              cause  any data loss, although it may make the compression ratio
+              slightly worse.  The BCJ filters are very fast and  use  an  in-
+              significant amount of memory.
+
+              These BCJ filters have known problems related to the compression
+              ratio:
+
+              o  Some types of files containing executable code (for  example,
+                 object  files,  static  libraries,  and Linux kernel modules)
+                 have the addresses in the  instructions  filled  with  filler
+                 values.   These BCJ filters will still do the address conver-
+                 sion, which will make the compression worse with these files.
+
+              o  If a BCJ filter is applied on an archive, it is possible that
+                 it  makes  the  compression  ratio worse than not using a BCJ
+                 filter.  For example, if there are similar or even  identical
+                 executables  then  filtering  will likely make the files less
+                 similar and thus compression is worse.  The contents of  non-
+                 executable  files  in  the  same  archive can matter too.  In
+                 practice one has to try with and without a BCJ filter to  see
+                 which is better in each situation.
+
+              Different  instruction  sets  have different alignment: the exe-
+              cutable file must be aligned to a multiple of this value in  the
+              input data to make the filter work.
+
+                     Filter      Alignment   Notes
+                     x86             1       32-bit or 64-bit x86
+                     ARM             4
+                     ARM-Thumb       2
+                     ARM64           4       4096-byte alignment is best
+                     PowerPC         4       Big endian only
+                     IA-64          16       Itanium
+                     SPARC           4
+
+              Since  the  BCJ-filtered  data is usually compressed with LZMA2,
+              the compression ratio may be improved slightly if the LZMA2  op-
+              tions are set to match the alignment of the selected BCJ filter.
+              For example, with the IA-64 filter, it's good  to  set  pb=4  or
+              even  pb=4,lp=4,lc=0  with LZMA2 (2^4=16).  The x86 filter is an
+              exception; it's usually good to stick to LZMA2's  default  four-
+              byte alignment when compressing x86 executables.
+
+              All BCJ filters support the same options:
+
+              start=offset
+                     Specify the start offset that is used when converting be-
+                     tween relative and absolute addresses.  The  offset  must
+                     be a multiple of the alignment of the filter (see the ta-
+                     ble above).  The default is zero.  In practice,  the  de-
+                     fault is good; specifying a custom offset is almost never
+                     useful.
+
+       --delta[=options]
+              Add the Delta filter to the filter chain.  The Delta filter  can
+              be only used as a non-last filter in the filter chain.
+
+              Currently  only simple byte-wise delta calculation is supported.
+              It can be useful when  compressing,  for  example,  uncompressed
+              bitmap  images or uncompressed PCM audio.  However, special pur-
+              pose algorithms may give significantly better results than Delta
+              +  LZMA2.   This is true especially with audio, which compresses
+              faster and better, for example, with flac(1).
+
+              Supported options:
+
+              dist=distance
+                     Specify the distance of the delta calculation  in  bytes.
+                     distance must be 1-256.  The default is 1.
+
+                     For example, with dist=2 and eight-byte input A1 B1 A2 B3
+                     A3 B5 A4 B7, the output will be A1 B1 01 02 01 02 01 02.
+
+   Other options
+       -q, --quiet
+              Suppress warnings and notices.  Specify this twice  to  suppress
+              errors too.  This option has no effect on the exit status.  That
+              is, even if a warning was suppressed, the exit status  to  indi-
+              cate a warning is still used.
+
+       -v, --verbose
+              Be  verbose.   If  standard error is connected to a terminal, xz
+              will display a progress indicator.  Specifying  --verbose  twice
+              will give even more verbose output.
+
+              The progress indicator shows the following information:
+
+              o  Completion  percentage is shown if the size of the input file
+                 is known.  That is, the percentage cannot be shown in pipes.
+
+              o  Amount of compressed data produced (compressing) or  consumed
+                 (decompressing).
+
+              o  Amount  of  uncompressed  data consumed (compressing) or pro-
+                 duced (decompressing).
+
+              o  Compression ratio, which is calculated by dividing the amount
+                 of  compressed  data processed so far by the amount of uncom-
+                 pressed data processed so far.
+
+              o  Compression or decompression speed.  This is measured as  the
+                 amount  of  uncompressed  data consumed (compression) or pro-
+                 duced (decompression) per second.  It is shown  after  a  few
+                 seconds have passed since xz started processing the file.
+
+              o  Elapsed time in the format M:SS or H:MM:SS.
+
+              o  Estimated  remaining  time is shown only when the size of the
+                 input file is known and a  couple  of  seconds  have  already
+                 passed  since  xz  started  processing the file.  The time is
+                 shown in a less precise format which never  has  any  colons,
+                 for example, 2 min 30 s.
+
+              When  standard  error  is not a terminal, --verbose will make xz
+              print the filename, compressed size, uncompressed size, compres-
+              sion  ratio,  and  possibly also the speed and elapsed time on a
+              single line to standard error after compressing or decompressing
+              the file.  The speed and elapsed time are included only when the
+              operation took at least a few seconds.  If the operation  didn't
+              finish,  for example, due to user interruption, also the comple-
+              tion percentage is printed if the size  of  the  input  file  is
+              known.
+
+       -Q, --no-warn
+              Don't set the exit status to 2 even if a condition worth a warn-
+              ing was detected.  This  option  doesn't  affect  the  verbosity
+              level,  thus  both  --quiet and --no-warn have to be used to not
+              display warnings and to not alter the exit status.
+
+       --robot
+              Print messages in a machine-parsable format.  This  is  intended
+              to  ease  writing  frontends  that want to use xz instead of li-
+              blzma, which may be the case with various scripts.   The  output
+              with  this  option  enabled  is meant to be stable across xz re-
+              leases.  See the section ROBOT MODE for details.
+
+       --info-memory
+              Display, in human-readable  format,  how  much  physical  memory
+              (RAM)  and  how  many processor threads xz thinks the system has
+              and the memory usage limits for compression  and  decompression,
+              and exit successfully.
+
+       -h, --help
+              Display  a  help  message  describing the most commonly used op-
+              tions, and exit successfully.
+
+       -H, --long-help
+              Display a help message describing all features of xz,  and  exit
+              successfully
+
+       -V, --version
+              Display  the  version number of xz and liblzma in human readable
+              format.  To get machine-parsable output, specify --robot  before
+              --version.
+
+ROBOT MODE
+       The robot mode is activated with the --robot option.  It makes the out-
+       put of xz easier to parse by other programs.  Currently --robot is sup-
+       ported  only  together  with  --version, --info-memory, and --list.  It
+       will be supported for compression and decompression in the future.
+
+   Version
+       xz --robot --version will print the version number of xz and liblzma in
+       the following format:
+
+       XZ_VERSION=XYYYZZZS
+       LIBLZMA_VERSION=XYYYZZZS
+
+       X      Major version.
+
+       YYY    Minor  version.  Even numbers are stable.  Odd numbers are alpha
+              or beta versions.
+
+       ZZZ    Patch level for stable releases or just a counter  for  develop-
+              ment releases.
+
+       S      Stability.  0 is alpha, 1 is beta, and 2 is stable.  S should be
+              always 2 when YYY is even.
+
+       XYYYZZZS are the same on both lines if xz and liblzma are from the same
+       XZ Utils release.
+
+       Examples: 4.999.9beta is 49990091 and 5.0.0 is 50000002.
+
+   Memory limit information
+       xz  --robot --info-memory prints a single line with three tab-separated
+       columns:
+
+       1.  Total amount of physical memory (RAM) in bytes.
+
+       2.  Memory usage limit for compression in bytes  (--memlimit-compress).
+           A  special  value of 0 indicates the default setting which for sin-
+           gle-threaded mode is the same as no limit.
+
+       3.  Memory usage limit for decompression  in  bytes  (--memlimit-decom-
+           press).   A  special value of 0 indicates the default setting which
+           for single-threaded mode is the same as no limit.
+
+       4.  Since xz 5.3.4alpha: Memory usage for multi-threaded  decompression
+           in  bytes (--memlimit-mt-decompress).  This is never zero because a
+           system-specific default value shown in the column 5 is used  if  no
+           limit  has  been  specified explicitly.  This is also never greater
+           than the value in the column 3 even if  a  larger  value  has  been
+           specified with --memlimit-mt-decompress.
+
+       5.  Since  xz  5.3.4alpha: A system-specific default memory usage limit
+           that is used to limit the number of threads when  compressing  with
+           an  automatic  number  of threads (--threads=0) and no memory usage
+           limit has been specified (--memlimit-compress).  This is also  used
+           as the default value for --memlimit-mt-decompress.
+
+       6.  Since xz 5.3.4alpha: Number of available processor threads.
+
+       In  the  future,  the  output of xz --robot --info-memory may have more
+       columns, but never more than a single line.
+
+   List mode
+       xz --robot --list uses tab-separated output.  The first column of every
+       line  has  a string that indicates the type of the information found on
+       that line:
+
+       name   This is always the first line when starting to list a file.  The
+              second column on the line is the filename.
+
+       file   This line contains overall information about the .xz file.  This
+              line is always printed after the name line.
+
+       stream This line type is used only when --verbose was specified.  There
+              are as many stream lines as there are streams in the .xz file.
+
+       block  This line type is used only when --verbose was specified.  There
+              are as many block lines as there are blocks  in  the  .xz  file.
+              The  block lines are shown after all the stream lines; different
+              line types are not interleaved.
+
+       summary
+              This line type is used only when --verbose was specified  twice.
+              This line is printed after all block lines.  Like the file line,
+              the summary line contains  overall  information  about  the  .xz
+              file.
+
+       totals This  line  is always the very last line of the list output.  It
+              shows the total counts and sizes.
+
+       The columns of the file lines:
+              2.  Number of streams in the file
+              3.  Total number of blocks in the stream(s)
+              4.  Compressed size of the file
+              5.  Uncompressed size of the file
+              6.  Compression ratio, for example, 0.123.   If  ratio  is  over
+                  9.999,  three  dashes (---) are displayed instead of the ra-
+                  tio.
+              7.  Comma-separated list of integrity check names.  The  follow-
+                  ing strings are used for the known check types: None, CRC32,
+                  CRC64, and SHA-256.  For unknown check types,  Unknown-N  is
+                  used,  where  N  is the Check ID as a decimal number (one or
+                  two digits).
+              8.  Total size of stream padding in the file
+
+       The columns of the stream lines:
+              2.  Stream number (the first stream is 1)
+              3.  Number of blocks in the stream
+              4.  Compressed start offset
+              5.  Uncompressed start offset
+              6.  Compressed size (does not include stream padding)
+              7.  Uncompressed size
+              8.  Compression ratio
+              9.  Name of the integrity check
+              10. Size of stream padding
+
+       The columns of the block lines:
+              2.  Number of the stream containing this block
+              3.  Block number relative to the beginning of  the  stream  (the
+                  first block is 1)
+              4.  Block number relative to the beginning of the file
+              5.  Compressed  start  offset  relative  to the beginning of the
+                  file
+              6.  Uncompressed start offset relative to the beginning  of  the
+                  file
+              7.  Total compressed size of the block (includes headers)
+              8.  Uncompressed size
+              9.  Compression ratio
+              10. Name of the integrity check
+
+       If  --verbose  was  specified twice, additional columns are included on
+       the block lines.  These are not displayed with a single --verbose,  be-
+       cause  getting  this  information  requires  many seeks and can thus be
+       slow:
+              11. Value of the integrity check in hexadecimal
+              12. Block header size
+              13. Block flags: c indicates that compressed  size  is  present,
+                  and  u  indicates that uncompressed size is present.  If the
+                  flag is not set, a dash (-) is shown  instead  to  keep  the
+                  string  length  fixed.  New flags may be added to the end of
+                  the string in the future.
+              14. Size of the actual compressed data in the  block  (this  ex-
+                  cludes the block header, block padding, and check fields)
+              15. Amount  of  memory  (in  bytes)  required to decompress this
+                  block with this xz version
+              16. Filter chain.  Note that most of the options  used  at  com-
+                  pression time cannot be known, because only the options that
+                  are needed for decompression are stored in the .xz headers.
+
+       The columns of the summary lines:
+              2.  Amount of memory (in bytes) required to decompress this file
+                  with this xz version
+              3.  yes  or  no  indicating  if all block headers have both com-
+                  pressed size and uncompressed size stored in them
+              Since xz 5.1.2alpha:
+              4.  Minimum xz version required to decompress the file
+
+       The columns of the totals line:
+              2.  Number of streams
+              3.  Number of blocks
+              4.  Compressed size
+              5.  Uncompressed size
+              6.  Average compression ratio
+              7.  Comma-separated list of  integrity  check  names  that  were
+                  present in the files
+              8.  Stream padding size
+              9.  Number of files.  This is here to keep the order of the ear-
+                  lier columns the same as on file lines.
+
+       If --verbose was specified twice, additional columns  are  included  on
+       the totals line:
+              10. Maximum  amount  of memory (in bytes) required to decompress
+                  the files with this xz version
+              11. yes or no indicating if all block  headers  have  both  com-
+                  pressed size and uncompressed size stored in them
+              Since xz 5.1.2alpha:
+              12. Minimum xz version required to decompress the file
+
+       Future  versions may add new line types and new columns can be added to
+       the existing line types, but the existing columns won't be changed.
+
+EXIT STATUS
+       0      All is good.
+
+       1      An error occurred.
+
+       2      Something worth a warning occurred, but  no  actual  errors  oc-
+              curred.
+
+       Notices (not warnings or errors) printed on standard error don't affect
+       the exit status.
+
+ENVIRONMENT
+       xz parses space-separated lists of options from the  environment  vari-
+       ables XZ_DEFAULTS and XZ_OPT, in this order, before parsing the options
+       from the command line.  Note that only options are parsed from the  en-
+       vironment  variables; all non-options are silently ignored.  Parsing is
+       done with getopt_long(3) which is used also for the command line  argu-
+       ments.
+
+       XZ_DEFAULTS
+              User-specific or system-wide default options.  Typically this is
+              set in a shell initialization script to enable xz's memory usage
+              limiter  by default.  Excluding shell initialization scripts and
+              similar special cases, scripts must never set  or  unset  XZ_DE-
+              FAULTS.
+
+       XZ_OPT This is for passing options to xz when it is not possible to set
+              the options directly on the xz command line.  This is  the  case
+              when xz is run by a script or tool, for example, GNU tar(1):
+
+                     XZ_OPT=-2v tar caf foo.tar.xz foo
+
+              Scripts  may use XZ_OPT, for example, to set script-specific de-
+              fault compression options.  It is  still  recommended  to  allow
+              users to override XZ_OPT if that is reasonable.  For example, in
+              sh(1) scripts one may use something like this:
+
+                     XZ_OPT=${XZ_OPT-"-7e"}
+                     export XZ_OPT
+
+LZMA UTILS COMPATIBILITY
+       The command line syntax of xz is practically a superset  of  lzma,  un-
+       lzma,  and lzcat as found from LZMA Utils 4.32.x.  In most cases, it is
+       possible to replace LZMA Utils with XZ Utils without breaking  existing
+       scripts.   There are some incompatibilities though, which may sometimes
+       cause problems.
+
+   Compression preset levels
+       The numbering of the compression level presets is not identical  in  xz
+       and  LZMA Utils.  The most important difference is how dictionary sizes
+       are mapped to different presets.  Dictionary size is roughly  equal  to
+       the decompressor memory usage.
+
+              Level     xz      LZMA Utils
+               -0     256 KiB      N/A
+               -1       1 MiB     64 KiB
+               -2       2 MiB      1 MiB
+               -3       4 MiB    512 KiB
+               -4       4 MiB      1 MiB
+               -5       8 MiB      2 MiB
+               -6       8 MiB      4 MiB
+               -7      16 MiB      8 MiB
+               -8      32 MiB     16 MiB
+               -9      64 MiB     32 MiB
+
+       The dictionary size differences affect the compressor memory usage too,
+       but there are some other differences between LZMA Utils and  XZ  Utils,
+       which make the difference even bigger:
+
+              Level     xz      LZMA Utils 4.32.x
+               -0       3 MiB          N/A
+               -1       9 MiB          2 MiB
+               -2      17 MiB         12 MiB
+               -3      32 MiB         12 MiB
+               -4      48 MiB         16 MiB
+               -5      94 MiB         26 MiB
+               -6      94 MiB         45 MiB
+               -7     186 MiB         83 MiB
+               -8     370 MiB        159 MiB
+               -9     674 MiB        311 MiB
+
+       The  default  preset  level in LZMA Utils is -7 while in XZ Utils it is
+       -6, so both use an 8 MiB dictionary by default.
+
+   Streamed vs. non-streamed .lzma files
+       The uncompressed size of the file can be stored in  the  .lzma  header.
+       LZMA  Utils  does that when compressing regular files.  The alternative
+       is to mark that uncompressed size is  unknown  and  use  end-of-payload
+       marker to indicate where the decompressor should stop.  LZMA Utils uses
+       this method when uncompressed size isn't known, which is the case,  for
+       example, in pipes.
+
+       xz  supports  decompressing  .lzma files with or without end-of-payload
+       marker, but all .lzma files  created  by  xz  will  use  end-of-payload
+       marker  and  have  uncompressed  size  marked  as  unknown in the .lzma
+       header.  This may be a problem in some uncommon situations.  For  exam-
+       ple,  a  .lzma  decompressor in an embedded device might work only with
+       files that have known uncompressed size.  If you hit this problem,  you
+       need to use LZMA Utils or LZMA SDK to create .lzma files with known un-
+       compressed size.
+
+   Unsupported .lzma files
+       The .lzma format allows lc values up to 8, and lp values up to 4.  LZMA
+       Utils can decompress files with any lc and lp, but always creates files
+       with lc=3 and lp=0.  Creating files with other lc and  lp  is  possible
+       with xz and with LZMA SDK.
+
+       The implementation of the LZMA1 filter in liblzma requires that the sum
+       of lc and lp must not exceed 4.  Thus, .lzma files, which  exceed  this
+       limitation, cannot be decompressed with xz.
+
+       LZMA Utils creates only .lzma files which have a dictionary size of 2^n
+       (a power of 2) but accepts files with any dictionary size.  liblzma ac-
+       cepts  only  .lzma  files  which have a dictionary size of 2^n or 2^n +
+       2^(n-1).  This is to decrease  false  positives  when  detecting  .lzma
+       files.
+
+       These limitations shouldn't be a problem in practice, since practically
+       all .lzma files have been compressed with settings  that  liblzma  will
+       accept.
+
+   Trailing garbage
+       When  decompressing,  LZMA  Utils  silently ignore everything after the
+       first .lzma stream.  In most situations, this  is  a  bug.   This  also
+       means  that  LZMA  Utils don't support decompressing concatenated .lzma
+       files.
+
+       If there is data left after the first .lzma stream,  xz  considers  the
+       file to be corrupt unless --single-stream was used.  This may break ob-
+       scure scripts which have assumed that trailing garbage is ignored.
+
+NOTES
+   Compressed output may vary
+       The exact compressed output produced from the same  uncompressed  input
+       file may vary between XZ Utils versions even if compression options are
+       identical.  This is because the encoder can be improved (faster or bet-
+       ter  compression)  without  affecting  the file format.  The output can
+       vary even between different builds of the same  XZ  Utils  version,  if
+       different build options are used.
+
+       The above means that once --rsyncable has been implemented, the result-
+       ing files won't necessarily be rsyncable unless both old and new  files
+       have  been  compressed  with  the same xz version.  This problem can be
+       fixed if a part of the encoder implementation is frozen to keep rsynca-
+       ble output stable across xz versions.
+
+   Embedded .xz decompressors
+       Embedded .xz decompressor implementations like XZ Embedded don't neces-
+       sarily support files created with integrity check types other than none
+       and   crc32.    Since  the  default  is  --check=crc64,  you  must  use
+       --check=none or --check=crc32 when creating files for embedded systems.
+
+       Outside embedded systems, all .xz format decompressors support all  the
+       check  types, or at least are able to decompress the file without veri-
+       fying the integrity check if the particular check is not supported.
+
+       XZ Embedded supports BCJ filters, but only with the default start  off-
+       set.
+
+EXAMPLES
+   Basics
+       Compress  the  file foo into foo.xz using the default compression level
+       (-6), and remove foo if compression is successful:
+
+              xz foo
+
+       Decompress bar.xz into bar and don't remove bar.xz even  if  decompres-
+       sion is successful:
+
+              xz -dk bar.xz
+
+       Create  baz.tar.xz  with the preset -4e (-4 --extreme), which is slower
+       than the default -6, but needs less memory for compression  and  decom-
+       pression (48 MiB and 5 MiB, respectively):
+
+              tar cf - baz | xz -4e > baz.tar.xz
+
+       A mix of compressed and uncompressed files can be decompressed to stan-
+       dard output with a single command:
+
+              xz -dcf a.txt b.txt.xz c.txt d.txt.lzma > abcd.txt
+
+   Parallel compression of many files
+       On GNU and *BSD, find(1) and xargs(1) can be used to  parallelize  com-
+       pression of many files:
+
+              find . -type f \! -name '*.xz' -print0 \
+                  | xargs -0r -P4 -n16 xz -T1
+
+       The  -P  option  to  xargs(1) sets the number of parallel xz processes.
+       The best value for the -n option depends on how many files there are to
+       be  compressed.   If there are only a couple of files, the value should
+       probably be 1; with tens of thousands of files, 100 or even more may be
+       appropriate  to  reduce  the  number of xz processes that xargs(1) will
+       eventually create.
+
+       The option -T1 for xz is there to force it to single-threaded mode, be-
+       cause xargs(1) is used to control the amount of parallelization.
+
+   Robot mode
+       Calculate  how  many  bytes  have been saved in total after compressing
+       multiple files:
+
+              xz --robot --list *.xz | awk '/^totals/{print $5-$4}'
+
+       A script may want to know that it is using new enough xz.  The  follow-
+       ing  sh(1)  script  checks that the version number of the xz tool is at
+       least 5.0.0.  This method is compatible with old beta  versions,  which
+       didn't support the --robot option:
+
+              if ! eval "$(xz --robot --version 2> /dev/null)" ||
+                      [ "$XZ_VERSION" -lt 50000002 ]; then
+                  echo "Your xz is too old."
+              fi
+              unset XZ_VERSION LIBLZMA_VERSION
+
+       Set a memory usage limit for decompression using XZ_OPT, but if a limit
+       has already been set, don't increase it:
+
+              NEWLIM=$((123 << 20))  # 123 MiB
+              OLDLIM=$(xz --robot --info-memory | cut -f3)
+              if [ $OLDLIM -eq 0 -o $OLDLIM -gt $NEWLIM ]; then
+                  XZ_OPT="$XZ_OPT --memlimit-decompress=$NEWLIM"
+                  export XZ_OPT
+              fi
+
+   Custom compressor filter chains
+       The simplest use for custom filter chains is customizing a  LZMA2  pre-
+       set.   This  can  be useful, because the presets cover only a subset of
+       the potentially useful combinations of compression settings.
+
+       The CompCPU columns of the tables from the descriptions of the  options
+       -0  ...  -9  and  --extreme  are useful when customizing LZMA2 presets.
+       Here are the relevant parts collected from those two tables:
+
+              Preset   CompCPU
+               -0         0
+
+               -1         1
+               -2         2
+               -3         3
+               -4         4
+               -5         5
+               -6         6
+               -5e        7
+               -6e        8
+
+       If you know that a file requires somewhat big dictionary (for  example,
+       32 MiB)  to  compress well, but you want to compress it quicker than xz
+       -8 would do, a preset with a low CompCPU value (for example, 1) can  be
+       modified to use a bigger dictionary:
+
+              xz --lzma2=preset=1,dict=32MiB foo.tar
+
+       With  certain  files,  the above command may be faster than xz -6 while
+       compressing significantly better.  However, it must be emphasized  that
+       only some files benefit from a big dictionary while keeping the CompCPU
+       value low.  The most obvious situation, where a big dictionary can help
+       a  lot,  is  an archive containing very similar files of at least a few
+       megabytes each.  The dictionary size has  to  be  significantly  bigger
+       than  any  individual file to allow LZMA2 to take full advantage of the
+       similarities between consecutive files.
+
+       If very high compressor and decompressor memory usage is fine, and  the
+       file  being compressed is at least several hundred megabytes, it may be
+       useful to use an even bigger dictionary than the  64  MiB  that  xz  -9
+       would use:
+
+              xz -vv --lzma2=dict=192MiB big_foo.tar
+
+       Using -vv (--verbose --verbose) like in the above example can be useful
+       to see the memory requirements of the compressor and decompressor.  Re-
+       member that using a dictionary bigger than the size of the uncompressed
+       file is waste of memory, so the above command isn't  useful  for  small
+       files.
+
+       Sometimes  the  compression  time  doesn't matter, but the decompressor
+       memory usage has to be kept low, for example, to make  it  possible  to
+       decompress  the file on an embedded system.  The following command uses
+       -6e (-6 --extreme) as a base and sets the dictionary  to  only  64 KiB.
+       The  resulting  file  can  be decompressed with XZ Embedded (that's why
+       there is --check=crc32) using about 100 KiB of memory.
+
+              xz --check=crc32 --lzma2=preset=6e,dict=64KiB foo
+
+       If you want to squeeze out as many bytes  as  possible,  adjusting  the
+       number  of  literal  context bits (lc) and number of position bits (pb)
+       can sometimes help.  Adjusting the number of literal position bits (lp)
+       might help too, but usually lc and pb are more important.  For example,
+       a source code archive contains mostly US-ASCII text, so something  like
+       the following might give slightly (like 0.1 %) smaller file than xz -6e
+       (try also without lc=4):
+
+              xz --lzma2=preset=6e,pb=0,lc=4 source_code.tar
+
+       Using another filter together with LZMA2 can improve  compression  with
+       certain file types.  For example, to compress a x86-32 or x86-64 shared
+       library using the x86 BCJ filter:
+
+              xz --x86 --lzma2 libfoo.so
+
+       Note that the order of the filter options is significant.  If --x86  is
+       specified after --lzma2, xz will give an error, because there cannot be
+       any filter after LZMA2, and also because the x86 BCJ filter  cannot  be
+       used as the last filter in the chain.
+
+       The  Delta filter together with LZMA2 can give good results with bitmap
+       images.  It should usually beat PNG, which has a few more advanced fil-
+       ters than simple delta but uses Deflate for the actual compression.
+
+       The  image  has to be saved in uncompressed format, for example, as un-
+       compressed TIFF.  The distance parameter of the Delta filter is set  to
+       match  the number of bytes per pixel in the image.  For example, 24-bit
+       RGB bitmap needs dist=3, and it is also good to pass pb=0 to  LZMA2  to
+       accommodate the three-byte alignment:
+
+              xz --delta=dist=3 --lzma2=pb=0 foo.tiff
+
+       If  multiple  images  have been put into a single archive (for example,
+       .tar), the Delta filter will work on that too as  long  as  all  images
+       have the same number of bytes per pixel.
+
+SEE ALSO
+       xzdec(1),   xzdiff(1),   xzgrep(1),   xzless(1),   xzmore(1),  gzip(1),
+       bzip2(1), 7z(1)
+
+       XZ Utils: <https://tukaani.org/xz/>
+       XZ Embedded: <https://tukaani.org/xz/embedded.html>
+       LZMA SDK: <http://7-zip.org/sdk.html>
+
+
+
+Tukaani                           2022-12-01                             XZ(1)
diff --git a/doc/man/txt/xzdec.txt b/doc/man/txt/xzdec.txt
new file mode 100644
index 0000000..a914e20
--- /dev/null
+++ b/doc/man/txt/xzdec.txt
@@ -0,0 +1,80 @@
+XZDEC(1)                           XZ Utils                           XZDEC(1)
+
+
+
+NAME
+       xzdec, lzmadec - Small .xz and .lzma decompressors
+
+SYNOPSIS
+       xzdec [option...]  [file...]
+       lzmadec [option...]  [file...]
+
+DESCRIPTION
+       xzdec is a liblzma-based decompression-only tool for .xz (and only .xz)
+       files.  xzdec is intended to work as a drop-in replacement for xz(1) in
+       the  most  common  situations where a script has been written to use xz
+       --decompress --stdout (and possibly a few other commonly used  options)
+       to decompress .xz files.  lzmadec is identical to xzdec except that lz-
+       madec supports .lzma files instead of .xz files.
+
+       To reduce the size of the  executable,  xzdec  doesn't  support  multi-
+       threading  or  localization,  and doesn't read options from XZ_DEFAULTS
+       and XZ_OPT environment variables.  xzdec doesn't support displaying in-
+       termediate progress information: sending SIGINFO to xzdec does nothing,
+       but sending  SIGUSR1  terminates  the  process  instead  of  displaying
+       progress information.
+
+OPTIONS
+       -d, --decompress, --uncompress
+              Ignored for xz(1) compatibility.  xzdec supports only decompres-
+              sion.
+
+       -k, --keep
+              Ignored for xz(1) compatibility.  xzdec never creates or removes
+              any files.
+
+       -c, --stdout, --to-stdout
+              Ignored for xz(1) compatibility.  xzdec always writes the decom-
+              pressed data to standard output.
+
+       -q, --quiet
+              Specifying this once does nothing since xzdec never displays any
+              warnings or notices.  Specify this twice to suppress errors.
+
+       -Q, --no-warn
+              Ignored for xz(1) compatibility.  xzdec never uses the exit sta-
+              tus 2.
+
+       -h, --help
+              Display a help message and exit successfully.
+
+       -V, --version
+              Display the version number of xzdec and liblzma.
+
+EXIT STATUS
+       0      All was good.
+
+       1      An error occurred.
+
+       xzdec doesn't have any warning messages like xz(1) has, thus  the  exit
+       status 2 is not used by xzdec.
+
+NOTES
+       Use  xz(1)  instead of xzdec or lzmadec for normal everyday use.  xzdec
+       or lzmadec are meant only for situations where it is important to  have
+       a smaller decompressor than the full-featured xz(1).
+
+       xzdec  and  lzmadec are not really that small.  The size can be reduced
+       further by dropping features from liblzma at  compile  time,  but  that
+       shouldn't  usually  be done for executables distributed in typical non-
+       embedded operating system distributions.  If you need a truly small .xz
+       decompressor, consider using XZ Embedded.
+
+SEE ALSO
+       xz(1)
+
+       XZ Embedded: <https://tukaani.org/xz/embedded.html>
+
+
+
+Tukaani                           2017-04-19                          XZDEC(1)
diff --git a/doc/man/txt/xzdiff.txt b/doc/man/txt/xzdiff.txt
new file mode 100644
index 0000000..681b00c
--- /dev/null
+++ b/doc/man/txt/xzdiff.txt
@@ -0,0 +1,37 @@
+XZDIFF(1)                          XZ Utils                          XZDIFF(1)
+
+
+
+NAME
+       xzcmp, xzdiff, lzcmp, lzdiff - compare compressed files
+
+SYNOPSIS
+       xzcmp [cmp_options] file1 [file2]
+       xzdiff [diff_options] file1 [file2]
+       lzcmp [cmp_options] file1 [file2]
+       lzdiff [diff_options] file1 [file2]
+
+DESCRIPTION
+       xzcmp  and  xzdiff  invoke  cmp(1)  or diff(1) on files compressed with
+       xz(1), lzma(1), gzip(1), bzip2(1), lzop(1), or  zstd(1).   All  options
+       specified  are  passed directly to cmp(1) or diff(1).  If only one file
+       is specified, then the files compared are file1 (which must have a suf-
+       fix  of  a  supported compression format) and file1 from which the com-
+       pression format suffix has been stripped.  If two files are  specified,
+       then  they  are uncompressed if necessary and fed to cmp(1) or diff(1).
+       The exit status from cmp(1) or diff(1) is preserved unless a decompres-
+       sion error occurs; then exit status is 2.
+
+       The names lzcmp and lzdiff are provided for backward compatibility with
+       LZMA Utils.
+
+SEE ALSO
+       cmp(1), diff(1), xz(1), gzip(1), bzip2(1), lzop(1), zstd(1), zdiff(1)
+
+BUGS
+       Messages from the cmp(1) or diff(1) programs refer to  temporary  file-
+       names instead of those specified.
+
+
+
+Tukaani                           2021-06-04                         XZDIFF(1)
diff --git a/doc/man/txt/xzgrep.txt b/doc/man/txt/xzgrep.txt
new file mode 100644
index 0000000..596520c
--- /dev/null
+++ b/doc/man/txt/xzgrep.txt
@@ -0,0 +1,49 @@
+XZGREP(1)                          XZ Utils                          XZGREP(1)
+
+
+
+NAME
+       xzgrep - search compressed files for a regular expression
+
+SYNOPSIS
+       xzgrep [grep_options] [-e] pattern [file...]
+       xzegrep ...
+       xzfgrep ...
+       lzgrep ...
+       lzegrep ...
+       lzfgrep ...
+
+DESCRIPTION
+       xzgrep  invokes  grep(1)  on  files which may be either uncompressed or
+       compressed with xz(1), lzma(1), gzip(1), bzip2(1), lzop(1), or zstd(1).
+       All options specified are passed directly to grep(1).
+
+       If  no file is specified, then standard input is decompressed if neces-
+       sary and fed to grep(1).  When reading from  standard  input,  gzip(1),
+       bzip2(1), lzop(1), and zstd(1) compressed files are not supported.
+
+       If  xzgrep  is invoked as xzegrep or xzfgrep then grep -E or grep -F is
+       used instead of grep(1).  The same applies to  names  lzgrep,  lzegrep,
+       and  lzfgrep,  which  are provided for backward compatibility with LZMA
+       Utils.
+
+EXIT STATUS
+       0      At least one match was found from at  least  one  of  the  input
+              files.  No errors occurred.
+
+       1      No  matches  were  found from any of the input files.  No errors
+              occurred.
+
+       >1     One or more errors occurred.  It  is  unknown  if  matches  were
+              found.
+
+ENVIRONMENT
+       GREP   If  the GREP environment variable is set, xzgrep uses it instead
+              of grep(1), grep -E, or grep -F.
+
+SEE ALSO
+       grep(1), xz(1), gzip(1), bzip2(1), lzop(1), zstd(1), zgrep(1)
+
+
+
+Tukaani                           2022-07-19                         XZGREP(1)
diff --git a/doc/man/txt/xzless.txt b/doc/man/txt/xzless.txt
new file mode 100644
index 0000000..5c14c80
--- /dev/null
+++ b/doc/man/txt/xzless.txt
@@ -0,0 +1,39 @@
+XZLESS(1)                          XZ Utils                          XZLESS(1)
+
+
+
+NAME
+       xzless, lzless - view xz or lzma compressed (text) files
+
+SYNOPSIS
+       xzless [file...]
+       lzless [file...]
+
+DESCRIPTION
+       xzless is a filter that displays text from compressed files to a termi-
+       nal.  It works on files compressed with xz(1) or lzma(1).  If no  files
+       are given, xzless reads from standard input.
+
+       xzless  uses  less(1) to present its output.  Unlike xzmore, its choice
+       of pager cannot be altered by setting an  environment  variable.   Com-
+       mands  are  based  on  both  more(1) and vi(1) and allow back and forth
+       movement and searching.  See the less(1) manual for more information.
+
+       The command named lzless is provided for  backward  compatibility  with
+       LZMA Utils.
+
+ENVIRONMENT
+       LESSMETACHARS
+              A list of characters special to the shell.  Set by xzless unless
+              it is already set in the environment.
+
+       LESSOPEN
+              Set to a command line to invoke the xz(1) decompressor for  pre-
+              processing the input files to less(1).
+
+SEE ALSO
+       less(1), xz(1), xzmore(1), zless(1)
+
+
+
+Tukaani                           2010-09-27                         XZLESS(1)
diff --git a/doc/man/txt/xzmore.txt b/doc/man/txt/xzmore.txt
new file mode 100644
index 0000000..5a9d86c
--- /dev/null
+++ b/doc/man/txt/xzmore.txt
@@ -0,0 +1,34 @@
+XZMORE(1)                          XZ Utils                          XZMORE(1)
+
+
+
+NAME
+       xzmore, lzmore - view xz or lzma compressed (text) files
+
+SYNOPSIS
+       xzmore [file...]
+       lzmore [file...]
+
+DESCRIPTION
+       xzmore  is  a  filter which allows examination of xz(1) or lzma(1) com-
+       pressed text files one screenful at a time on a soft-copy terminal.
+
+       To use a pager other than the default more,  set  environment  variable
+       PAGER  to the name of the desired program.  The name lzmore is provided
+       for backward compatibility with LZMA Utils.
+
+       e or q When the prompt --More--(Next file: file) is printed, this  com-
+              mand causes xzmore to exit.
+
+       s      When  the prompt --More--(Next file: file) is printed, this com-
+              mand causes xzmore to skip the next file and continue.
+
+       For list of keyboard commands supported while actually viewing the con-
+       tent of a file, refer to manual of the pager you use, usually more(1).
+
+SEE ALSO
+       more(1), xz(1), xzless(1), zmore(1)
+
+
+
+Tukaani                           2013-06-30                         XZMORE(1)
diff --git a/doc/xz-file-format.txt b/doc/xz-file-format.txt
new file mode 100644
index 0000000..09c83e0
--- /dev/null
+++ b/doc/xz-file-format.txt
@@ -0,0 +1,1165 @@
+
+The .xz File Format
+===================
+
+Version 1.1.0 (2022-12-11)
+
+
+        0. Preface
+           0.1. Notices and Acknowledgements
+           0.2. Getting the Latest Version
+           0.3. Version History
+        1. Conventions
+           1.1. Byte and Its Representation
+           1.2. Multibyte Integers
+        2. Overall Structure of .xz File
+           2.1. Stream
+                2.1.1. Stream Header
+                       2.1.1.1. Header Magic Bytes
+                       2.1.1.2. Stream Flags
+                       2.1.1.3. CRC32
+                2.1.2. Stream Footer
+                       2.1.2.1. CRC32
+                       2.1.2.2. Backward Size
+                       2.1.2.3. Stream Flags
+                       2.1.2.4. Footer Magic Bytes
+           2.2. Stream Padding
+        3. Block
+           3.1. Block Header
+                3.1.1. Block Header Size
+                3.1.2. Block Flags
+                3.1.3. Compressed Size
+                3.1.4. Uncompressed Size
+                3.1.5. List of Filter Flags
+                3.1.6. Header Padding
+                3.1.7. CRC32
+           3.2. Compressed Data
+           3.3. Block Padding
+           3.4. Check
+        4. Index
+           4.1. Index Indicator
+           4.2. Number of Records
+           4.3. List of Records
+                4.3.1. Unpadded Size
+                4.3.2. Uncompressed Size
+           4.4. Index Padding
+           4.5. CRC32
+        5. Filter Chains
+           5.1. Alignment
+           5.2. Security
+           5.3. Filters
+                5.3.1. LZMA2
+                5.3.2. Branch/Call/Jump Filters for Executables
+                5.3.3. Delta
+                       5.3.3.1. Format of the Encoded Output
+           5.4. Custom Filter IDs
+                5.4.1. Reserved Custom Filter ID Ranges
+        6. Cyclic Redundancy Checks
+        7. References
+
+
+0. Preface
+
+        This document describes the .xz file format (filename suffix
+        ".xz", MIME type "application/x-xz"). It is intended that this
+        this format replace the old .lzma format used by LZMA SDK and
+        LZMA Utils.
+
+
+0.1. Notices and Acknowledgements
+
+        This file format was designed by Lasse Collin
+        <lasse.collin@tukaani.org> and Igor Pavlov.
+
+        Special thanks for helping with this document goes to
+        Ville Koskinen. Thanks for helping with this document goes to
+        Mark Adler, H. Peter Anvin, Mikko Pouru, and Lars Wirzenius.
+
+        This document has been put into the public domain.
+
+
+0.2. Getting the Latest Version
+
+        The latest official version of this document can be downloaded
+        from <http://tukaani.org/xz/xz-file-format.txt>.
+
+        Specific versions of this document have a filename
+        xz-file-format-X.Y.Z.txt where X.Y.Z is the version number.
+        For example, the version 1.0.0 of this document is available
+        at <http://tukaani.org/xz/xz-file-format-1.0.0.txt>.
+
+
+0.3. Version History
+
+        Version   Date          Description
+
+        1.1.0     2022-12-11    Added ARM64 filter and clarified 32-bit
+                                ARM endianness in Section 5.3.2,
+                                language improvements in Section 5.4
+
+        1.0.4     2009-08-27    Language improvements in Sections 1.2,
+                                2.1.1.2, 3.1.1, 3.1.2, and 5.3.1
+
+        1.0.3     2009-06-05    Spelling fixes in Sections 5.1 and 5.4
+
+        1.0.2     2009-06-04    Typo fixes in Sections 4 and 5.3.1
+
+        1.0.1     2009-06-01    Typo fix in Section 0.3 and minor
+                                clarifications to Sections 2, 2.2,
+                                3.3, 4.4, and 5.3.2
+
+        1.0.0     2009-01-14    The first official version
+
+
+1. Conventions
+
+        The key words "MUST", "MUST NOT", "REQUIRED", "SHOULD",
+        "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
+        document are to be interpreted as described in [RFC-2119].
+
+        Indicating a warning means displaying a message, returning
+        appropriate exit status, or doing something else to let the
+        user know that something worth warning occurred. The operation
+        SHOULD still finish if a warning is indicated.
+
+        Indicating an error means displaying a message, returning
+        appropriate exit status, or doing something else to let the
+        user know that something prevented successfully finishing the
+        operation. The operation MUST be aborted once an error has
+        been indicated.
+
+
+1.1. Byte and Its Representation
+
+        In this document, byte is always 8 bits.
+
+        A "null byte" has all bits unset. That is, the value of a null
+        byte is 0x00.
+
+        To represent byte blocks, this document uses notation that
+        is similar to the notation used in [RFC-1952]:
+
+            +-------+
+            |  Foo  |   One byte.
+            +-------+
+
+            +---+---+
+            |  Foo  |   Two bytes; that is, some of the vertical bars
+            +---+---+   can be missing.
+
+            +=======+
+            |  Foo  |   Zero or more bytes.
+            +=======+
+
+        In this document, a boxed byte or a byte sequence declared
+        using this notation is called "a field". The example field
+        above would be called "the Foo field" or plain "Foo".
+
+        If there are many fields, they may be split to multiple lines.
+        This is indicated with an arrow ("--->"):
+
+            +=====+
+            | Foo |
+            +=====+
+
+                 +=====+
+            ---> | Bar |
+                 +=====+
+
+        The above is equivalent to this:
+
+            +=====+=====+
+            | Foo | Bar |
+            +=====+=====+
+
+
+1.2. Multibyte Integers
+
+        Multibyte integers of static length, such as CRC values,
+        are stored in little endian byte order (least significant
+        byte first).
+
+        When smaller values are more likely than bigger values (for
+        example file sizes), multibyte integers are encoded in a
+        variable-length representation:
+          - Numbers in the range [0, 127] are copied as is, and take
+            one byte of space.
+          - Bigger numbers will occupy two or more bytes. All but the
+            last byte of the multibyte representation have the highest
+            (eighth) bit set.
+
+        For now, the value of the variable-length integers is limited
+        to 63 bits, which limits the encoded size of the integer to
+        nine bytes. These limits may be increased in the future if
+        needed.
+
+        The following C code illustrates encoding and decoding of
+        variable-length integers. The functions return the number of
+        bytes occupied by the integer (1-9), or zero on error.
+
+            #include <stddef.h>
+            #include <inttypes.h>
+
+            size_t
+            encode(uint8_t buf[static 9], uint64_t num)
+            {
+                if (num > UINT64_MAX / 2)
+                    return 0;
+
+                size_t i = 0;
+
+                while (num >= 0x80) {
+                    buf[i++] = (uint8_t)(num) | 0x80;
+                    num >>= 7;
+                }
+
+                buf[i++] = (uint8_t)(num);
+
+                return i;
+            }
+
+            size_t
+            decode(const uint8_t buf[], size_t size_max, uint64_t *num)
+            {
+                if (size_max == 0)
+                    return 0;
+
+                if (size_max > 9)
+                    size_max = 9;
+
+                *num = buf[0] & 0x7F;
+                size_t i = 0;
+
+                while (buf[i++] & 0x80) {
+                    if (i >= size_max || buf[i] == 0x00)
+                        return 0;
+
+                    *num |= (uint64_t)(buf[i] & 0x7F) << (i * 7);
+                }
+
+                return i;
+            }
+
+
+2. Overall Structure of .xz File
+
+        A standalone .xz files consist of one or more Streams which may
+        have Stream Padding between or after them:
+
+            +========+================+========+================+
+            | Stream | Stream Padding | Stream | Stream Padding | ...
+            +========+================+========+================+
+
+        The sizes of Stream and Stream Padding are always multiples
+        of four bytes, thus the size of every valid .xz file MUST be
+        a multiple of four bytes.
+
+        While a typical file contains only one Stream and no Stream
+        Padding, a decoder handling standalone .xz files SHOULD support
+        files that have more than one Stream or Stream Padding.
+
+        In contrast to standalone .xz files, when the .xz file format
+        is used as an internal part of some other file format or
+        communication protocol, it usually is expected that the decoder
+        stops after the first Stream, and doesn't look for Stream
+        Padding or possibly other Streams.
+
+
+2.1. Stream
+
+        +-+-+-+-+-+-+-+-+-+-+-+-+=======+=======+     +=======+
+        |     Stream Header     | Block | Block | ... | Block |
+        +-+-+-+-+-+-+-+-+-+-+-+-+=======+=======+     +=======+
+
+             +=======+-+-+-+-+-+-+-+-+-+-+-+-+
+        ---> | Index |     Stream Footer     |
+             +=======+-+-+-+-+-+-+-+-+-+-+-+-+
+
+        All the above fields have a size that is a multiple of four. If
+        Stream is used as an internal part of another file format, it
+        is RECOMMENDED to make the Stream start at an offset that is
+        a multiple of four bytes.
+
+        Stream Header, Index, and Stream Footer are always present in
+        a Stream. The maximum size of the Index field is 16 GiB (2^34).
+
+        There are zero or more Blocks. The maximum number of Blocks is
+        limited only by the maximum size of the Index field.
+
+        Total size of a Stream MUST be less than 8 EiB (2^63 bytes).
+        The same limit applies to the total amount of uncompressed
+        data stored in a Stream.
+
+        If an implementation supports handling .xz files with multiple
+        concatenated Streams, it MAY apply the above limits to the file
+        as a whole instead of limiting per Stream basis.
+
+
+2.1.1. Stream Header
+
+        +---+---+---+---+---+---+-------+------+--+--+--+--+
+        |  Header Magic Bytes   | Stream Flags |   CRC32   |
+        +---+---+---+---+---+---+-------+------+--+--+--+--+
+
+
+2.1.1.1. Header Magic Bytes
+
+        The first six (6) bytes of the Stream are so called Header
+        Magic Bytes. They can be used to identify the file type.
+
+            Using a C array and ASCII:
+            const uint8_t HEADER_MAGIC[6]
+                    = { 0xFD, '7', 'z', 'X', 'Z', 0x00 };
+
+            In plain hexadecimal:
+            FD 37 7A 58 5A 00
+
+        Notes:
+          - The first byte (0xFD) was chosen so that the files cannot
+            be erroneously detected as being in .lzma format, in which
+            the first byte is in the range [0x00, 0xE0].
+          - The sixth byte (0x00) was chosen to prevent applications
+            from misdetecting the file as a text file.
+
+        If the Header Magic Bytes don't match, the decoder MUST
+        indicate an error.
+
+
+2.1.1.2. Stream Flags
+
+        The first byte of Stream Flags is always a null byte. In the
+        future, this byte may be used to indicate a new Stream version
+        or other Stream properties.
+
+        The second byte of Stream Flags is a bit field:
+
+            Bit(s)  Mask  Description
+             0-3    0x0F  Type of Check (see Section 3.4):
+                              ID    Size      Check name
+                              0x00   0 bytes  None
+                              0x01   4 bytes  CRC32
+                              0x02   4 bytes  (Reserved)
+                              0x03   4 bytes  (Reserved)
+                              0x04   8 bytes  CRC64
+                              0x05   8 bytes  (Reserved)
+                              0x06   8 bytes  (Reserved)
+                              0x07  16 bytes  (Reserved)
+                              0x08  16 bytes  (Reserved)
+                              0x09  16 bytes  (Reserved)
+                              0x0A  32 bytes  SHA-256
+                              0x0B  32 bytes  (Reserved)
+                              0x0C  32 bytes  (Reserved)
+                              0x0D  64 bytes  (Reserved)
+                              0x0E  64 bytes  (Reserved)
+                              0x0F  64 bytes  (Reserved)
+             4-7    0xF0  Reserved for future use; MUST be zero for now.
+
+        Implementations SHOULD support at least the Check IDs 0x00
+        (None) and 0x01 (CRC32). Supporting other Check IDs is
+        OPTIONAL. If an unsupported Check is used, the decoder SHOULD
+        indicate a warning or error.
+
+        If any reserved bit is set, the decoder MUST indicate an error.
+        It is possible that there is a new field present which the
+        decoder is not aware of, and can thus parse the Stream Header
+        incorrectly.
+
+
+2.1.1.3. CRC32
+
+        The CRC32 is calculated from the Stream Flags field. It is
+        stored as an unsigned 32-bit little endian integer. If the
+        calculated value does not match the stored one, the decoder
+        MUST indicate an error.
+
+        The idea is that Stream Flags would always be two bytes, even
+        if new features are needed. This way old decoders will be able
+        to verify the CRC32 calculated from Stream Flags, and thus
+        distinguish between corrupt files (CRC32 doesn't match) and
+        files that the decoder doesn't support (CRC32 matches but
+        Stream Flags has reserved bits set).
+
+
+2.1.2. Stream Footer
+
+        +-+-+-+-+---+---+---+---+-------+------+----------+---------+
+        | CRC32 | Backward Size | Stream Flags | Footer Magic Bytes |
+        +-+-+-+-+---+---+---+---+-------+------+----------+---------+
+
+
+2.1.2.1. CRC32
+
+        The CRC32 is calculated from the Backward Size and Stream Flags
+        fields. It is stored as an unsigned 32-bit little endian
+        integer. If the calculated value does not match the stored one,
+        the decoder MUST indicate an error.
+
+        The reason to have the CRC32 field before the Backward Size and
+        Stream Flags fields is to keep the four-byte fields aligned to
+        a multiple of four bytes.
+
+
+2.1.2.2. Backward Size
+
+        Backward Size is stored as a 32-bit little endian integer,
+        which indicates the size of the Index field as multiple of
+        four bytes, minimum value being four bytes:
+
+            real_backward_size = (stored_backward_size + 1) * 4;
+
+        If the stored value does not match the real size of the Index
+        field, the decoder MUST indicate an error.
+
+        Using a fixed-size integer to store Backward Size makes
+        it slightly simpler to parse the Stream Footer when the
+        application needs to parse the Stream backwards.
+
+
+2.1.2.3. Stream Flags
+
+        This is a copy of the Stream Flags field from the Stream
+        Header. The information stored to Stream Flags is needed
+        when parsing the Stream backwards. The decoder MUST compare
+        the Stream Flags fields in both Stream Header and Stream
+        Footer, and indicate an error if they are not identical.
+
+
+2.1.2.4. Footer Magic Bytes
+
+        As the last step of the decoding process, the decoder MUST
+        verify the existence of Footer Magic Bytes. If they don't
+        match, an error MUST be indicated.
+
+            Using a C array and ASCII:
+            const uint8_t FOOTER_MAGIC[2] = { 'Y', 'Z' };
+
+            In hexadecimal:
+            59 5A
+
+        The primary reason to have Footer Magic Bytes is to make
+        it easier to detect incomplete files quickly, without
+        uncompressing. If the file does not end with Footer Magic Bytes
+        (excluding Stream Padding described in Section 2.2), it cannot
+        be undamaged, unless someone has intentionally appended garbage
+        after the end of the Stream.
+
+
+2.2. Stream Padding
+
+        Only the decoders that support decoding of concatenated Streams
+        MUST support Stream Padding.
+
+        Stream Padding MUST contain only null bytes. To preserve the
+        four-byte alignment of consecutive Streams, the size of Stream
+        Padding MUST be a multiple of four bytes. Empty Stream Padding
+        is allowed. If these requirements are not met, the decoder MUST
+        indicate an error.
+
+        Note that non-empty Stream Padding is allowed at the end of the
+        file; there doesn't need to be a new Stream after non-empty
+        Stream Padding. This can be convenient in certain situations
+        [GNU-tar].
+
+        The possibility of Stream Padding MUST be taken into account
+        when designing an application that parses Streams backwards,
+        and the application supports concatenated Streams.
+
+
+3. Block
+
+        +==============+=================+===============+=======+
+        | Block Header | Compressed Data | Block Padding | Check |
+        +==============+=================+===============+=======+
+
+
+3.1. Block Header
+
+        +-------------------+-------------+=================+
+        | Block Header Size | Block Flags | Compressed Size |
+        +-------------------+-------------+=================+
+
+             +===================+======================+
+        ---> | Uncompressed Size | List of Filter Flags |
+             +===================+======================+
+
+             +================+--+--+--+--+
+        ---> | Header Padding |   CRC32   |
+             +================+--+--+--+--+
+
+
+3.1.1. Block Header Size
+
+        This field overlaps with the Index Indicator field (see
+        Section 4.1).
+
+        This field contains the size of the Block Header field,
+        including the Block Header Size field itself. Valid values are
+        in the range [0x01, 0xFF], which indicate the size of the Block
+        Header as multiples of four bytes, minimum size being eight
+        bytes:
+
+            real_header_size = (encoded_header_size + 1) * 4;
+
+        If a Block Header bigger than 1024 bytes is needed in the
+        future, a new field can be added between the Block Header and
+        Compressed Data fields. The presence of this new field would
+        be indicated in the Block Header field.
+
+
+3.1.2. Block Flags
+
+        The Block Flags field is a bit field:
+
+            Bit(s)  Mask  Description
+             0-1    0x03  Number of filters (1-4)
+             2-5    0x3C  Reserved for future use; MUST be zero for now.
+              6     0x40  The Compressed Size field is present.
+              7     0x80  The Uncompressed Size field is present.
+
+        If any reserved bit is set, the decoder MUST indicate an error.
+        It is possible that there is a new field present which the
+        decoder is not aware of, and can thus parse the Block Header
+        incorrectly.
+
+
+3.1.3. Compressed Size
+
+        This field is present only if the appropriate bit is set in
+        the Block Flags field (see Section 3.1.2).
+
+        The Compressed Size field contains the size of the Compressed
+        Data field, which MUST be non-zero. Compressed Size is stored
+        using the encoding described in Section 1.2. If the Compressed
+        Size doesn't match the size of the Compressed Data field, the
+        decoder MUST indicate an error.
+
+
+3.1.4. Uncompressed Size
+
+        This field is present only if the appropriate bit is set in
+        the Block Flags field (see Section 3.1.2).
+
+        The Uncompressed Size field contains the size of the Block
+        after uncompressing. Uncompressed Size is stored using the
+        encoding described in Section 1.2. If the Uncompressed Size
+        does not match the real uncompressed size, the decoder MUST
+        indicate an error.
+
+        Storing the Compressed Size and Uncompressed Size fields serves
+        several purposes:
+          - The decoder knows how much memory it needs to allocate
+            for a temporary buffer in multithreaded mode.
+          - Simple error detection: wrong size indicates a broken file.
+          - Seeking forwards to a specific location in streamed mode.
+
+        It should be noted that the only reliable way to determine
+        the real uncompressed size is to uncompress the Block,
+        because the Block Header and Index fields may contain
+        (intentionally or unintentionally) invalid information.
+
+
+3.1.5. List of Filter Flags
+
+        +================+================+     +================+
+        | Filter 0 Flags | Filter 1 Flags | ... | Filter n Flags |
+        +================+================+     +================+
+
+        The number of Filter Flags fields is stored in the Block Flags
+        field (see Section 3.1.2).
+
+        The format of each Filter Flags field is as follows:
+
+            +===========+====================+===================+
+            | Filter ID | Size of Properties | Filter Properties |
+            +===========+====================+===================+
+
+        Both Filter ID and Size of Properties are stored using the
+        encoding described in Section 1.2. Size of Properties indicates
+        the size of the Filter Properties field as bytes. The list of
+        officially defined Filter IDs and the formats of their Filter
+        Properties are described in Section 5.3.
+
+        Filter IDs greater than or equal to 0x4000_0000_0000_0000
+        (2^62) are reserved for implementation-specific internal use.
+        These Filter IDs MUST never be used in List of Filter Flags.
+
+
+3.1.6. Header Padding
+
+        This field contains as many null byte as it is needed to make
+        the Block Header have the size specified in Block Header Size.
+        If any of the bytes are not null bytes, the decoder MUST
+        indicate an error. It is possible that there is a new field
+        present which the decoder is not aware of, and can thus parse
+        the Block Header incorrectly.
+
+
+3.1.7. CRC32
+
+        The CRC32 is calculated over everything in the Block Header
+        field except the CRC32 field itself. It is stored as an
+        unsigned 32-bit little endian integer. If the calculated
+        value does not match the stored one, the decoder MUST indicate
+        an error.
+
+        By verifying the CRC32 of the Block Header before parsing the
+        actual contents allows the decoder to distinguish between
+        corrupt and unsupported files.
+
+
+3.2. Compressed Data
+
+        The format of Compressed Data depends on Block Flags and List
+        of Filter Flags. Excluding the descriptions of the simplest
+        filters in Section 5.3, the format of the filter-specific
+        encoded data is out of scope of this document.
+
+
+3.3. Block Padding
+
+        Block Padding MUST contain 0-3 null bytes to make the size of
+        the Block a multiple of four bytes. This can be needed when
+        the size of Compressed Data is not a multiple of four. If any
+        of the bytes in Block Padding are not null bytes, the decoder
+        MUST indicate an error.
+
+
+3.4. Check
+
+        The type and size of the Check field depends on which bits
+        are set in the Stream Flags field (see Section 2.1.1.2).
+
+        The Check, when used, is calculated from the original
+        uncompressed data. If the calculated Check does not match the
+        stored one, the decoder MUST indicate an error. If the selected
+        type of Check is not supported by the decoder, it SHOULD
+        indicate a warning or error.
+
+
+4. Index
+
+        +-----------------+===================+
+        | Index Indicator | Number of Records |
+        +-----------------+===================+
+
+             +=================+===============+-+-+-+-+
+        ---> | List of Records | Index Padding | CRC32 |
+             +=================+===============+-+-+-+-+
+
+        Index serves several purposes. Using it, one can
+          - verify that all Blocks in a Stream have been processed;
+          - find out the uncompressed size of a Stream; and
+          - quickly access the beginning of any Block (random access).
+
+
+4.1. Index Indicator
+
+        This field overlaps with the Block Header Size field (see
+        Section 3.1.1). The value of Index Indicator is always 0x00.
+
+
+4.2. Number of Records
+
+        This field indicates how many Records there are in the List
+        of Records field, and thus how many Blocks there are in the
+        Stream. The value is stored using the encoding described in
+        Section 1.2. If the decoder has decoded all the Blocks of the
+        Stream, and then notices that the Number of Records doesn't
+        match the real number of Blocks, the decoder MUST indicate an
+        error.
+
+
+4.3. List of Records
+
+        List of Records consists of as many Records as indicated by the
+        Number of Records field:
+
+            +========+========+
+            | Record | Record | ...
+            +========+========+
+
+        Each Record contains information about one Block:
+
+            +===============+===================+
+            | Unpadded Size | Uncompressed Size |
+            +===============+===================+
+
+        If the decoder has decoded all the Blocks of the Stream, it
+        MUST verify that the contents of the Records match the real
+        Unpadded Size and Uncompressed Size of the respective Blocks.
+
+        Implementation hint: It is possible to verify the Index with
+        constant memory usage by calculating for example SHA-256 of
+        both the real size values and the List of Records, then
+        comparing the hash values. Implementing this using
+        non-cryptographic hash like CRC32 SHOULD be avoided unless
+        small code size is important.
+
+        If the decoder supports random-access reading, it MUST verify
+        that Unpadded Size and Uncompressed Size of every completely
+        decoded Block match the sizes stored in the Index. If only
+        partial Block is decoded, the decoder MUST verify that the
+        processed sizes don't exceed the sizes stored in the Index.
+
+
+4.3.1. Unpadded Size
+
+        This field indicates the size of the Block excluding the Block
+        Padding field. That is, Unpadded Size is the size of the Block
+        Header, Compressed Data, and Check fields. Unpadded Size is
+        stored using the encoding described in Section 1.2. The value
+        MUST never be zero; with the current structure of Blocks, the
+        actual minimum value for Unpadded Size is five.
+
+        Implementation note: Because the size of the Block Padding
+        field is not included in Unpadded Size, calculating the total
+        size of a Stream or doing random-access reading requires
+        calculating the actual size of the Blocks by rounding Unpadded
+        Sizes up to the next multiple of four.
+
+        The reason to exclude Block Padding from Unpadded Size is to
+        ease making a raw copy of Compressed Data without Block
+        Padding. This can be useful, for example, if someone wants
+        to convert Streams to some other file format quickly.
+
+
+4.3.2. Uncompressed Size
+
+        This field indicates the Uncompressed Size of the respective
+        Block as bytes. The value is stored using the encoding
+        described in Section 1.2.
+
+
+4.4. Index Padding
+
+        This field MUST contain 0-3 null bytes to pad the Index to
+        a multiple of four bytes. If any of the bytes are not null
+        bytes, the decoder MUST indicate an error.
+
+
+4.5. CRC32
+
+        The CRC32 is calculated over everything in the Index field
+        except the CRC32 field itself. The CRC32 is stored as an
+        unsigned 32-bit little endian integer. If the calculated
+        value does not match the stored one, the decoder MUST indicate
+        an error.
+
+
+5. Filter Chains
+
+        The Block Flags field defines how many filters are used. When
+        more than one filter is used, the filters are chained; that is,
+        the output of one filter is the input of another filter. The
+        following figure illustrates the direction of data flow.
+
+                    v   Uncompressed Data   ^
+                    |       Filter 0        |
+            Encoder |       Filter 1        | Decoder
+                    |       Filter n        |
+                    v    Compressed Data    ^
+
+
+5.1. Alignment
+
+        Alignment of uncompressed input data is usually the job of
+        the application producing the data. For example, to get the
+        best results, an archiver tool should make sure that all
+        PowerPC executable files in the archive stream start at
+        offsets that are multiples of four bytes.
+
+        Some filters, for example LZMA2, can be configured to take
+        advantage of specified alignment of input data. Note that
+        taking advantage of aligned input can be beneficial also when
+        a filter is not the first filter in the chain. For example,
+        if you compress PowerPC executables, you may want to use the
+        PowerPC filter and chain that with the LZMA2 filter. Because
+        not only the input but also the output alignment of the PowerPC
+        filter is four bytes, it is now beneficial to set LZMA2
+        settings so that the LZMA2 encoder can take advantage of its
+        four-byte-aligned input data.
+
+        The output of the last filter in the chain is stored to the
+        Compressed Data field, which is is guaranteed to be aligned
+        to a multiple of four bytes relative to the beginning of the
+        Stream. This can increase
+          - speed, if the filtered data is handled multiple bytes at
+            a time by the filter-specific encoder and decoder,
+            because accessing aligned data in computer memory is
+            usually faster; and
+          - compression ratio, if the output data is later compressed
+            with an external compression tool.
+
+
+5.2. Security
+
+        If filters would be allowed to be chained freely, it would be
+        possible to create malicious files, that would be very slow to
+        decode. Such files could be used to create denial of service
+        attacks.
+
+        Slow files could occur when multiple filters are chained:
+
+            v   Compressed input data
+            |   Filter 1 decoder (last filter)
+            |   Filter 0 decoder (non-last filter)
+            v   Uncompressed output data
+
+        The decoder of the last filter in the chain produces a lot of
+        output from little input. Another filter in the chain takes the
+        output of the last filter, and produces very little output
+        while consuming a lot of input. As a result, a lot of data is
+        moved inside the filter chain, but the filter chain as a whole
+        gets very little work done.
+
+        To prevent this kind of slow files, there are restrictions on
+        how the filters can be chained. These restrictions MUST be
+        taken into account when designing new filters.
+
+        The maximum number of filters in the chain has been limited to
+        four, thus there can be at maximum of three non-last filters.
+        Of these three non-last filters, only two are allowed to change
+        the size of the data.
+
+        The non-last filters, that change the size of the data, MUST
+        have a limit how much the decoder can compress the data: the
+        decoder SHOULD produce at least n bytes of output when the
+        filter is given 2n bytes of input. This  limit is not
+        absolute, but significant deviations MUST be avoided.
+
+        The above limitations guarantee that if the last filter in the
+        chain produces 4n bytes of output, the chain as a whole will
+        produce at least n bytes of output.
+
+
+5.3. Filters
+
+5.3.1. LZMA2
+
+        LZMA (Lempel-Ziv-Markov chain-Algorithm) is a general-purpose
+        compression algorithm with high compression ratio and fast
+        decompression. LZMA is based on LZ77 and range coding
+        algorithms.
+
+        LZMA2 is an extension on top of the original LZMA. LZMA2 uses
+        LZMA internally, but adds support for flushing the encoder,
+        uncompressed chunks, eases stateful decoder implementations,
+        and improves support for multithreading. Thus, the plain LZMA
+        will not be supported in this file format.
+
+            Filter ID:                  0x21
+            Size of Filter Properties:  1 byte
+            Changes size of data:       Yes
+            Allow as a non-last filter: No
+            Allow as the last filter:   Yes
+
+            Preferred alignment:
+                Input data:             Adjustable to 1/2/4/8/16 byte(s)
+                Output data:            1 byte
+
+        The format of the one-byte Filter Properties field is as
+        follows:
+
+            Bits   Mask   Description
+            0-5    0x3F   Dictionary Size
+            6-7    0xC0   Reserved for future use; MUST be zero for now.
+
+        Dictionary Size is encoded with one-bit mantissa and five-bit
+        exponent. The smallest dictionary size is 4 KiB and the biggest
+        is 4 GiB.
+
+            Raw value   Mantissa   Exponent   Dictionary size
+                0           2         11         4 KiB
+                1           3         11         6 KiB
+                2           2         12         8 KiB
+                3           3         12        12 KiB
+                4           2         13        16 KiB
+                5           3         13        24 KiB
+                6           2         14        32 KiB
+              ...         ...        ...      ...
+               35           3         27       768 MiB
+               36           2         28      1024 MiB
+               37           3         29      1536 MiB
+               38           2         30      2048 MiB
+               39           3         30      3072 MiB
+               40           2         31      4096 MiB - 1 B
+
+        Instead of having a table in the decoder, the dictionary size
+        can be decoded using the following C code:
+
+            const uint8_t bits = get_dictionary_flags() & 0x3F;
+            if (bits > 40)
+                return DICTIONARY_TOO_BIG; // Bigger than 4 GiB
+
+            uint32_t dictionary_size;
+            if (bits == 40) {
+                dictionary_size = UINT32_MAX;
+            } else {
+                dictionary_size = 2 | (bits & 1);
+                dictionary_size <<= bits / 2 + 11;
+            }
+
+
+5.3.2. Branch/Call/Jump Filters for Executables
+
+        These filters convert relative branch, call, and jump
+        instructions to their absolute counterparts in executable
+        files. This conversion increases redundancy and thus
+        compression ratio.
+
+            Size of Filter Properties:  0 or 4 bytes
+            Changes size of data:       No
+            Allow as a non-last filter: Yes
+            Allow as the last filter:   No
+
+        Below is the list of filters in this category. The alignment
+        is the same for both input and output data.
+
+            Filter ID   Alignment   Description
+              0x04       1 byte     x86 filter (BCJ)
+              0x05       4 bytes    PowerPC (big endian) filter
+              0x06      16 bytes    IA64 filter
+              0x07       4 bytes    ARM filter [1]
+              0x08       2 bytes    ARM Thumb filter [1]
+              0x09       4 bytes    SPARC filter
+              0x0A       4 bytes    ARM64 filter [2]
+
+              [1] These are for little endian instruction encoding.
+                  This must not be confused with data endianness.
+                  A processor configured for big endian data access
+                  may still use little endian instruction encoding.
+                  The filters don't care about the data endianness.
+
+              [2] 4096-byte alignment gives the best results
+                  because the address in the ADRP instruction
+                  is a multiple of 4096 bytes.
+
+        If the size of Filter Properties is four bytes, the Filter
+        Properties field contains the start offset used for address
+        conversions. It is stored as an unsigned 32-bit little endian
+        integer. The start offset MUST be a multiple of the alignment
+        of the filter as listed in the table above; if it isn't, the
+        decoder MUST indicate an error. If the size of Filter
+        Properties is zero, the start offset is zero.
+
+        Setting the start offset may be useful if an executable has
+        multiple sections, and there are many cross-section calls.
+        Taking advantage of this feature usually requires usage of
+        the Subblock filter, whose design is not complete yet.
+
+
+5.3.3. Delta
+
+        The Delta filter may increase compression ratio when the value
+        of the next byte correlates with the value of an earlier byte
+        at specified distance.
+
+            Filter ID:                  0x03
+            Size of Filter Properties:  1 byte
+            Changes size of data:       No
+            Allow as a non-last filter: Yes
+            Allow as the last filter:   No
+
+            Preferred alignment:
+                Input data:             1 byte
+                Output data:            Same as the original input data
+
+        The Properties byte indicates the delta distance, which can be
+        1-256 bytes backwards from the current byte: 0x00 indicates
+        distance of 1 byte and 0xFF distance of 256 bytes.
+
+
+5.3.3.1. Format of the Encoded Output
+
+        The code below illustrates both encoding and decoding with
+        the Delta filter.
+
+            // Distance is in the range [1, 256].
+            const unsigned int distance = get_properties_byte() + 1;
+            uint8_t pos = 0;
+            uint8_t delta[256];
+
+            memset(delta, 0, sizeof(delta));
+
+            while (1) {
+                const int byte = read_byte();
+                if (byte == EOF)
+                    break;
+
+                uint8_t tmp = delta[(uint8_t)(distance + pos)];
+                if (is_encoder) {
+                    tmp = (uint8_t)(byte) - tmp;
+                    delta[pos] = (uint8_t)(byte);
+                } else {
+                    tmp = (uint8_t)(byte) + tmp;
+                    delta[pos] = tmp;
+                }
+
+                write_byte(tmp);
+                --pos;
+            }
+
+
+5.4. Custom Filter IDs
+
+        If a developer wants to use custom Filter IDs, there are two
+        choices. The first choice is to contact Lasse Collin and ask
+        him to allocate a range of IDs for the developer.
+
+        The second choice is to generate a 40-bit random integer
+        which the developer can use as a personal Developer ID.
+        To minimize the risk of collisions, Developer ID has to be
+        a randomly generated integer, not manually selected "hex word".
+        The following command, which works on many free operating
+        systems, can be used to generate Developer ID:
+
+            dd if=/dev/urandom bs=5 count=1 | hexdump
+
+        The developer can then use the Developer ID to create unique
+        (well, hopefully unique) Filter IDs.
+
+            Bits    Mask                    Description
+             0-15   0x0000_0000_0000_FFFF   Filter ID
+            16-55   0x00FF_FFFF_FFFF_0000   Developer ID
+            56-62   0x3F00_0000_0000_0000   Static prefix: 0x3F
+
+        The resulting 63-bit integer will use 9 bytes of space when
+        stored using the encoding described in Section 1.2. To get
+        a shorter ID, see the beginning of this Section how to
+        request a custom ID range.
+
+
+5.4.1. Reserved Custom Filter ID Ranges
+
+        Range                       Description
+        0x0000_0300 - 0x0000_04FF   Reserved to ease .7z compatibility
+        0x0002_0000 - 0x0007_FFFF   Reserved to ease .7z compatibility
+        0x0200_0000 - 0x07FF_FFFF   Reserved to ease .7z compatibility
+
+
+6. Cyclic Redundancy Checks
+
+        There are several incompatible variations to calculate CRC32
+        and CRC64. For simplicity and clarity, complete examples are
+        provided to calculate the checks as they are used in this file
+        format. Implementations MAY use different code as long as it
+        gives identical results.
+
+        The program below reads data from standard input, calculates
+        the CRC32 and CRC64 values, and prints the calculated values
+        as big endian hexadecimal strings to standard output.
+
+            #include <stddef.h>
+            #include <inttypes.h>
+            #include <stdio.h>
+
+            uint32_t crc32_table[256];
+            uint64_t crc64_table[256];
+
+            void
+            init(void)
+            {
+                static const uint32_t poly32 = UINT32_C(0xEDB88320);
+                static const uint64_t poly64
+                        = UINT64_C(0xC96C5795D7870F42);
+
+                for (size_t i = 0; i < 256; ++i) {
+                    uint32_t crc32 = i;
+                    uint64_t crc64 = i;
+
+                    for (size_t j = 0; j < 8; ++j) {
+                        if (crc32 & 1)
+                            crc32 = (crc32 >> 1) ^ poly32;
+                        else
+                            crc32 >>= 1;
+
+                        if (crc64 & 1)
+                            crc64 = (crc64 >> 1) ^ poly64;
+                        else
+                            crc64 >>= 1;
+                    }
+
+                    crc32_table[i] = crc32;
+                    crc64_table[i] = crc64;
+                }
+            }
+
+            uint32_t
+            crc32(const uint8_t *buf, size_t size, uint32_t crc)
+            {
+                crc = ~crc;
+                for (size_t i = 0; i < size; ++i)
+                    crc = crc32_table[buf[i] ^ (crc & 0xFF)]
+                            ^ (crc >> 8);
+                return ~crc;
+            }
+
+            uint64_t
+            crc64(const uint8_t *buf, size_t size, uint64_t crc)
+            {
+                crc = ~crc;
+                for (size_t i = 0; i < size; ++i)
+                    crc = crc64_table[buf[i] ^ (crc & 0xFF)]
+                            ^ (crc >> 8);
+                return ~crc;
+            }
+
+            int
+            main()
+            {
+                init();
+
+                uint32_t value32 = 0;
+                uint64_t value64 = 0;
+                uint64_t total_size = 0;
+                uint8_t buf[8192];
+
+                while (1) {
+                    const size_t buf_size
+                            = fread(buf, 1, sizeof(buf), stdin);
+                    if (buf_size == 0)
+                        break;
+
+                    total_size += buf_size;
+                    value32 = crc32(buf, buf_size, value32);
+                    value64 = crc64(buf, buf_size, value64);
+                }
+
+                printf("Bytes:  %" PRIu64 "\n", total_size);
+                printf("CRC-32: 0x%08" PRIX32 "\n", value32);
+                printf("CRC-64: 0x%016" PRIX64 "\n", value64);
+
+                return 0;
+            }
+
+
+7. References
+
+        LZMA SDK - The original LZMA implementation
+        http://7-zip.org/sdk.html
+
+        LZMA Utils - LZMA adapted to POSIX-like systems
+        http://tukaani.org/lzma/
+
+        XZ Utils - The next generation of LZMA Utils
+        http://tukaani.org/xz/
+
+        [RFC-1952]
+        GZIP file format specification version 4.3
+        http://www.ietf.org/rfc/rfc1952.txt
+          - Notation of byte boxes in section "2.1. Overall conventions"
+
+        [RFC-2119]
+        Key words for use in RFCs to Indicate Requirement Levels
+        http://www.ietf.org/rfc/rfc2119.txt
+
+        [GNU-tar]
+        GNU tar 1.21 manual
+        http://www.gnu.org/software/tar/manual/html_node/Blocking-Factor.html
+          - Node 9.4.2 "Blocking Factor", paragraph that begins
+            "gzip will complain about trailing garbage"
+          - Note that this URL points to the latest version of the
+            manual, and may some day not contain the note which is in
+            1.21. For the exact version of the manual, download GNU
+            tar 1.21: ftp://ftp.gnu.org/pub/gnu/tar/tar-1.21.tar.gz
+
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 21:12:04 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 21:12:04 +0000
commit	eac54b7c4aec25060d7bd856f7cdc290943d6aae (patch)
tree	9a6d81c9f88df4698e746d63d14ddafeddd918b8 /doc
parent	Initial commit. (diff)
download	xz-utils-eac54b7c4aec25060d7bd856f7cdc290943d6aae.tar.xz xz-utils-eac54b7c4aec25060d7bd856f7cdc290943d6aae.zip