diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 21:12:04 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 21:12:04 +0000 |
commit | eac54b7c4aec25060d7bd856f7cdc290943d6aae (patch) | |
tree | 9a6d81c9f88df4698e746d63d14ddafeddd918b8 /src/xz | |
parent | Initial commit. (diff) | |
download | xz-utils-upstream.tar.xz xz-utils-upstream.zip |
Adding upstream version 5.4.1.upstream/5.4.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/xz')
-rw-r--r-- | src/xz/Makefile.am | 133 | ||||
-rw-r--r-- | src/xz/Makefile.in | 1162 | ||||
-rw-r--r-- | src/xz/args.c | 751 | ||||
-rw-r--r-- | src/xz/args.h | 44 | ||||
-rw-r--r-- | src/xz/coder.c | 1108 | ||||
-rw-r--r-- | src/xz/coder.h | 78 | ||||
-rw-r--r-- | src/xz/file_io.c | 1366 | ||||
-rw-r--r-- | src/xz/file_io.h | 179 | ||||
-rw-r--r-- | src/xz/hardware.c | 338 | ||||
-rw-r--r-- | src/xz/hardware.h | 74 | ||||
-rw-r--r-- | src/xz/list.c | 1317 | ||||
-rw-r--r-- | src/xz/list.h | 18 | ||||
-rw-r--r-- | src/xz/main.c | 344 | ||||
-rw-r--r-- | src/xz/main.h | 30 | ||||
-rw-r--r-- | src/xz/message.c | 1146 | ||||
-rw-r--r-- | src/xz/message.h | 152 | ||||
-rw-r--r-- | src/xz/mytime.c | 86 | ||||
-rw-r--r-- | src/xz/mytime.h | 43 | ||||
-rw-r--r-- | src/xz/options.c | 358 | ||||
-rw-r--r-- | src/xz/options.h | 31 | ||||
-rw-r--r-- | src/xz/private.h | 66 | ||||
-rw-r--r-- | src/xz/signals.c | 209 | ||||
-rw-r--r-- | src/xz/signals.h | 43 | ||||
-rw-r--r-- | src/xz/suffix.c | 411 | ||||
-rw-r--r-- | src/xz/suffix.h | 28 | ||||
-rw-r--r-- | src/xz/util.c | 286 | ||||
-rw-r--r-- | src/xz/util.h | 119 | ||||
-rw-r--r-- | src/xz/xz.1 | 3020 | ||||
-rw-r--r-- | src/xz/xz_w32res.rc | 12 |
29 files changed, 12952 insertions, 0 deletions
diff --git a/src/xz/Makefile.am b/src/xz/Makefile.am new file mode 100644 index 0000000..4bc64f3 --- /dev/null +++ b/src/xz/Makefile.am @@ -0,0 +1,133 @@ +## +## Author: Lasse Collin +## +## This file has been put into the public domain. +## You can do whatever you want with this file. +## + +bin_PROGRAMS = xz + +xz_SOURCES = \ + args.c \ + args.h \ + coder.c \ + coder.h \ + file_io.c \ + file_io.h \ + hardware.c \ + hardware.h \ + main.c \ + main.h \ + message.c \ + message.h \ + mytime.c \ + mytime.h \ + options.c \ + options.h \ + private.h \ + signals.c \ + signals.h \ + suffix.c \ + suffix.h \ + util.c \ + util.h \ + ../common/tuklib_open_stdxxx.c \ + ../common/tuklib_progname.c \ + ../common/tuklib_exit.c \ + ../common/tuklib_mbstr_width.c \ + ../common/tuklib_mbstr_fw.c + +if COND_MAIN_DECODER +xz_SOURCES += \ + list.c \ + list.h +endif + +if COND_W32 +xz_SOURCES += xz_w32res.rc +endif + +xz_CPPFLAGS = \ + -DLOCALEDIR=\"$(localedir)\" \ + -I$(top_srcdir)/src/common \ + -I$(top_srcdir)/src/liblzma/api \ + -I$(top_builddir)/lib + +xz_LDADD = $(top_builddir)/src/liblzma/liblzma.la $(CAPSICUM_LIB) + +if COND_GNULIB +xz_LDADD += $(top_builddir)/lib/libgnu.a +endif + +# libgnu.a may need these libs, so this must be after libgnu.a. +xz_LDADD += $(LTLIBINTL) + + +# Windows resource compiler support +.rc.o: + $(RC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(xz_CPPFLAGS) $(CPPFLAGS) $(RCFLAGS) -i $< -o $@ + + +dist_man_MANS = xz.1 + + +## Create symlinks for unxz and xzcat for convenience. Create symlinks also +## for lzma, unlzma, and lzcat for compatibility with LZMA Utils 4.32.x. +xzlinks = unxz xzcat + +if COND_LZMALINKS +xzlinks += lzma unlzma lzcat +endif + +install-exec-hook: + cd "$(DESTDIR)$(bindir)" && \ + target=`echo xz | sed '$(transform)'`$(EXEEXT) && \ + for name in $(xzlinks); do \ + link=`echo $$name | sed '$(transform)'`$(LN_EXEEXT) && \ + rm -f "$$link" && \ + $(LN_S) "$$target" "$$link"; \ + done + +# The installation of translated man pages abuses Automake internals +# by calling "install-man" with redefined dist_man_MANS and man_MANS. +# If this breaks some day, don't blame Automake developers. +install-data-hook: + languages= ; \ + if test "$(USE_NLS)" = yes && test -d "$(top_srcdir)/po4a/man"; then \ + languages=`ls "$(top_srcdir)/po4a/man"`; \ + fi; \ + target=`echo xz | sed '$(transform)'` && \ + for lang in . $$languages; do \ + man="$(top_srcdir)/po4a/man/$$lang/xz.1" ; \ + if test -f "$$man"; then \ + $(MAKE) dist_man_MANS="$$man" man_MANS= \ + mandir="$(mandir)/$$lang" install-man; \ + fi; \ + man1dir="$(DESTDIR)$(mandir)/$$lang/man1" && \ + if test -f "$$man1dir/$$target.1"; then ( \ + cd "$$man1dir" && \ + for name in $(xzlinks); do \ + link=`echo $$name | sed '$(transform)'` && \ + rm -f "$$link.1" && \ + $(LN_S) "$$target.1" "$$link.1"; \ + done \ + ); fi; \ + done + +uninstall-hook: + cd "$(DESTDIR)$(bindir)" && \ + for name in $(xzlinks); do \ + link=`echo $$name | sed '$(transform)'`$(LN_EXEEXT) && \ + rm -f "$$link"; \ + done + languages= ; \ + if test "$(USE_NLS)" = yes && test -d "$(top_srcdir)/po4a/man"; then \ + languages=`ls "$(top_srcdir)/po4a/man"`; \ + fi; \ + for lang in . $$languages; do \ + for name in xz $(xzlinks); do \ + name=`echo $$name | sed '$(transform)'` && \ + rm -f "$(DESTDIR)$(mandir)/$$lang/man1/$$name.1"; \ + done; \ + done diff --git a/src/xz/Makefile.in b/src/xz/Makefile.in new file mode 100644 index 0000000..e6422c5 --- /dev/null +++ b/src/xz/Makefile.in @@ -0,0 +1,1162 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +bin_PROGRAMS = xz$(EXEEXT) +@COND_MAIN_DECODER_TRUE@am__append_1 = \ +@COND_MAIN_DECODER_TRUE@ list.c \ +@COND_MAIN_DECODER_TRUE@ list.h + +@COND_W32_TRUE@am__append_2 = xz_w32res.rc +@COND_GNULIB_TRUE@am__append_3 = $(top_builddir)/lib/libgnu.a +@COND_LZMALINKS_TRUE@am__append_4 = lzma unlzma lzcat +subdir = src/xz +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_capsicum.m4 \ + $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/getopt.m4 \ + $(top_srcdir)/m4/gettext.m4 $(top_srcdir)/m4/host-cpu-c-abi.m4 \ + $(top_srcdir)/m4/iconv.m4 $(top_srcdir)/m4/intlmacosx.m4 \ + $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \ + $(top_srcdir)/m4/lib-prefix.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/po.m4 \ + $(top_srcdir)/m4/posix-shell.m4 $(top_srcdir)/m4/progtest.m4 \ + $(top_srcdir)/m4/tuklib_common.m4 \ + $(top_srcdir)/m4/tuklib_cpucores.m4 \ + $(top_srcdir)/m4/tuklib_integer.m4 \ + $(top_srcdir)/m4/tuklib_mbstr.m4 \ + $(top_srcdir)/m4/tuklib_physmem.m4 \ + $(top_srcdir)/m4/tuklib_progname.m4 \ + $(top_srcdir)/m4/visibility.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" +PROGRAMS = $(bin_PROGRAMS) +am__xz_SOURCES_DIST = args.c args.h coder.c coder.h file_io.c \ + file_io.h hardware.c hardware.h main.c main.h message.c \ + message.h mytime.c mytime.h options.c options.h private.h \ + signals.c signals.h suffix.c suffix.h util.c util.h \ + ../common/tuklib_open_stdxxx.c ../common/tuklib_progname.c \ + ../common/tuklib_exit.c ../common/tuklib_mbstr_width.c \ + ../common/tuklib_mbstr_fw.c list.c list.h xz_w32res.rc +@COND_MAIN_DECODER_TRUE@am__objects_1 = xz-list.$(OBJEXT) +@COND_W32_TRUE@am__objects_2 = xz_w32res.$(OBJEXT) +am_xz_OBJECTS = xz-args.$(OBJEXT) xz-coder.$(OBJEXT) \ + xz-file_io.$(OBJEXT) xz-hardware.$(OBJEXT) xz-main.$(OBJEXT) \ + xz-message.$(OBJEXT) xz-mytime.$(OBJEXT) xz-options.$(OBJEXT) \ + xz-signals.$(OBJEXT) xz-suffix.$(OBJEXT) xz-util.$(OBJEXT) \ + xz-tuklib_open_stdxxx.$(OBJEXT) xz-tuklib_progname.$(OBJEXT) \ + xz-tuklib_exit.$(OBJEXT) xz-tuklib_mbstr_width.$(OBJEXT) \ + xz-tuklib_mbstr_fw.$(OBJEXT) $(am__objects_1) $(am__objects_2) +xz_OBJECTS = $(am_xz_OBJECTS) +am__DEPENDENCIES_1 = +xz_DEPENDENCIES = $(top_builddir)/src/liblzma/liblzma.la \ + $(am__DEPENDENCIES_1) $(am__append_3) $(am__DEPENDENCIES_1) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/xz-args.Po ./$(DEPDIR)/xz-coder.Po \ + ./$(DEPDIR)/xz-file_io.Po ./$(DEPDIR)/xz-hardware.Po \ + ./$(DEPDIR)/xz-list.Po ./$(DEPDIR)/xz-main.Po \ + ./$(DEPDIR)/xz-message.Po ./$(DEPDIR)/xz-mytime.Po \ + ./$(DEPDIR)/xz-options.Po ./$(DEPDIR)/xz-signals.Po \ + ./$(DEPDIR)/xz-suffix.Po ./$(DEPDIR)/xz-tuklib_exit.Po \ + ./$(DEPDIR)/xz-tuklib_mbstr_fw.Po \ + ./$(DEPDIR)/xz-tuklib_mbstr_width.Po \ + ./$(DEPDIR)/xz-tuklib_open_stdxxx.Po \ + ./$(DEPDIR)/xz-tuklib_progname.Po ./$(DEPDIR)/xz-util.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(xz_SOURCES) +DIST_SOURCES = $(am__xz_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +man1dir = $(mandir)/man1 +NROFF = nroff +MANS = $(dist_man_MANS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__DIST_COMMON = $(dist_man_MANS) $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_CFLAGS = @AM_CFLAGS@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CAPSICUM_LIB = @CAPSICUM_LIB@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAG_VISIBILITY = @CFLAG_VISIBILITY@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +GETOPT_H = @GETOPT_H@ +GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@ +GMSGFMT = @GMSGFMT@ +GMSGFMT_015 = @GMSGFMT_015@ +GREP = @GREP@ +HAVE_VISIBILITY = @HAVE_VISIBILITY@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INTLLIBS = @INTLLIBS@ +INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBICONV = @LIBICONV@ +LIBINTL = @LIBINTL@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_EXEEXT = @LN_EXEEXT@ +LN_S = @LN_S@ +LTLIBICONV = @LTLIBICONV@ +LTLIBINTL = @LTLIBINTL@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MSGFMT = @MSGFMT@ +MSGMERGE = @MSGMERGE@ +MSGMERGE_FOR_MSGFMT_OPTION = @MSGMERGE_FOR_MSGFMT_OPTION@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POSIX_SHELL = @POSIX_SHELL@ +POSUB = @POSUB@ +PREFERABLY_POSIX_SHELL = @PREFERABLY_POSIX_SHELL@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_CXX = @PTHREAD_CXX@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +RC = @RC@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +USE_NLS = @USE_NLS@ +VERSION = @VERSION@ +XGETTEXT = @XGETTEXT@ +XGETTEXT_015 = @XGETTEXT_015@ +XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +ax_pthread_config = @ax_pthread_config@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +enable_path_for_scripts = @enable_path_for_scripts@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +xz = @xz@ +xz_SOURCES = args.c args.h coder.c coder.h file_io.c file_io.h \ + hardware.c hardware.h main.c main.h message.c message.h \ + mytime.c mytime.h options.c options.h private.h signals.c \ + signals.h suffix.c suffix.h util.c util.h \ + ../common/tuklib_open_stdxxx.c ../common/tuklib_progname.c \ + ../common/tuklib_exit.c ../common/tuklib_mbstr_width.c \ + ../common/tuklib_mbstr_fw.c $(am__append_1) $(am__append_2) +xz_CPPFLAGS = \ + -DLOCALEDIR=\"$(localedir)\" \ + -I$(top_srcdir)/src/common \ + -I$(top_srcdir)/src/liblzma/api \ + -I$(top_builddir)/lib + + +# libgnu.a may need these libs, so this must be after libgnu.a. +xz_LDADD = $(top_builddir)/src/liblzma/liblzma.la $(CAPSICUM_LIB) \ + $(am__append_3) $(LTLIBINTL) +dist_man_MANS = xz.1 +xzlinks = unxz xzcat $(am__append_4) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj .rc +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/xz/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/xz/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p \ + || test -f $$p1 \ + ; then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' \ + -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' \ + `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(bindir)" && rm -f $$files + +clean-binPROGRAMS: + @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +xz$(EXEEXT): $(xz_OBJECTS) $(xz_DEPENDENCIES) $(EXTRA_xz_DEPENDENCIES) + @rm -f xz$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(xz_OBJECTS) $(xz_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xz-args.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xz-coder.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xz-file_io.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xz-hardware.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xz-list.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xz-main.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xz-message.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xz-mytime.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xz-options.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xz-signals.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xz-suffix.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xz-tuklib_exit.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xz-tuklib_mbstr_fw.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xz-tuklib_mbstr_width.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xz-tuklib_open_stdxxx.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xz-tuklib_progname.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xz-util.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +xz-args.o: args.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-args.o -MD -MP -MF $(DEPDIR)/xz-args.Tpo -c -o xz-args.o `test -f 'args.c' || echo '$(srcdir)/'`args.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-args.Tpo $(DEPDIR)/xz-args.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='args.c' object='xz-args.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-args.o `test -f 'args.c' || echo '$(srcdir)/'`args.c + +xz-args.obj: args.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-args.obj -MD -MP -MF $(DEPDIR)/xz-args.Tpo -c -o xz-args.obj `if test -f 'args.c'; then $(CYGPATH_W) 'args.c'; else $(CYGPATH_W) '$(srcdir)/args.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-args.Tpo $(DEPDIR)/xz-args.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='args.c' object='xz-args.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-args.obj `if test -f 'args.c'; then $(CYGPATH_W) 'args.c'; else $(CYGPATH_W) '$(srcdir)/args.c'; fi` + +xz-coder.o: coder.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-coder.o -MD -MP -MF $(DEPDIR)/xz-coder.Tpo -c -o xz-coder.o `test -f 'coder.c' || echo '$(srcdir)/'`coder.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-coder.Tpo $(DEPDIR)/xz-coder.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='coder.c' object='xz-coder.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-coder.o `test -f 'coder.c' || echo '$(srcdir)/'`coder.c + +xz-coder.obj: coder.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-coder.obj -MD -MP -MF $(DEPDIR)/xz-coder.Tpo -c -o xz-coder.obj `if test -f 'coder.c'; then $(CYGPATH_W) 'coder.c'; else $(CYGPATH_W) '$(srcdir)/coder.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-coder.Tpo $(DEPDIR)/xz-coder.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='coder.c' object='xz-coder.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-coder.obj `if test -f 'coder.c'; then $(CYGPATH_W) 'coder.c'; else $(CYGPATH_W) '$(srcdir)/coder.c'; fi` + +xz-file_io.o: file_io.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-file_io.o -MD -MP -MF $(DEPDIR)/xz-file_io.Tpo -c -o xz-file_io.o `test -f 'file_io.c' || echo '$(srcdir)/'`file_io.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-file_io.Tpo $(DEPDIR)/xz-file_io.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='file_io.c' object='xz-file_io.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-file_io.o `test -f 'file_io.c' || echo '$(srcdir)/'`file_io.c + +xz-file_io.obj: file_io.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-file_io.obj -MD -MP -MF $(DEPDIR)/xz-file_io.Tpo -c -o xz-file_io.obj `if test -f 'file_io.c'; then $(CYGPATH_W) 'file_io.c'; else $(CYGPATH_W) '$(srcdir)/file_io.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-file_io.Tpo $(DEPDIR)/xz-file_io.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='file_io.c' object='xz-file_io.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-file_io.obj `if test -f 'file_io.c'; then $(CYGPATH_W) 'file_io.c'; else $(CYGPATH_W) '$(srcdir)/file_io.c'; fi` + +xz-hardware.o: hardware.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-hardware.o -MD -MP -MF $(DEPDIR)/xz-hardware.Tpo -c -o xz-hardware.o `test -f 'hardware.c' || echo '$(srcdir)/'`hardware.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-hardware.Tpo $(DEPDIR)/xz-hardware.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='hardware.c' object='xz-hardware.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-hardware.o `test -f 'hardware.c' || echo '$(srcdir)/'`hardware.c + +xz-hardware.obj: hardware.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-hardware.obj -MD -MP -MF $(DEPDIR)/xz-hardware.Tpo -c -o xz-hardware.obj `if test -f 'hardware.c'; then $(CYGPATH_W) 'hardware.c'; else $(CYGPATH_W) '$(srcdir)/hardware.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-hardware.Tpo $(DEPDIR)/xz-hardware.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='hardware.c' object='xz-hardware.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-hardware.obj `if test -f 'hardware.c'; then $(CYGPATH_W) 'hardware.c'; else $(CYGPATH_W) '$(srcdir)/hardware.c'; fi` + +xz-main.o: main.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-main.o -MD -MP -MF $(DEPDIR)/xz-main.Tpo -c -o xz-main.o `test -f 'main.c' || echo '$(srcdir)/'`main.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-main.Tpo $(DEPDIR)/xz-main.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='main.c' object='xz-main.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-main.o `test -f 'main.c' || echo '$(srcdir)/'`main.c + +xz-main.obj: main.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-main.obj -MD -MP -MF $(DEPDIR)/xz-main.Tpo -c -o xz-main.obj `if test -f 'main.c'; then $(CYGPATH_W) 'main.c'; else $(CYGPATH_W) '$(srcdir)/main.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-main.Tpo $(DEPDIR)/xz-main.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='main.c' object='xz-main.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-main.obj `if test -f 'main.c'; then $(CYGPATH_W) 'main.c'; else $(CYGPATH_W) '$(srcdir)/main.c'; fi` + +xz-message.o: message.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-message.o -MD -MP -MF $(DEPDIR)/xz-message.Tpo -c -o xz-message.o `test -f 'message.c' || echo '$(srcdir)/'`message.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-message.Tpo $(DEPDIR)/xz-message.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='message.c' object='xz-message.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-message.o `test -f 'message.c' || echo '$(srcdir)/'`message.c + +xz-message.obj: message.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-message.obj -MD -MP -MF $(DEPDIR)/xz-message.Tpo -c -o xz-message.obj `if test -f 'message.c'; then $(CYGPATH_W) 'message.c'; else $(CYGPATH_W) '$(srcdir)/message.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-message.Tpo $(DEPDIR)/xz-message.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='message.c' object='xz-message.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-message.obj `if test -f 'message.c'; then $(CYGPATH_W) 'message.c'; else $(CYGPATH_W) '$(srcdir)/message.c'; fi` + +xz-mytime.o: mytime.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-mytime.o -MD -MP -MF $(DEPDIR)/xz-mytime.Tpo -c -o xz-mytime.o `test -f 'mytime.c' || echo '$(srcdir)/'`mytime.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-mytime.Tpo $(DEPDIR)/xz-mytime.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mytime.c' object='xz-mytime.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-mytime.o `test -f 'mytime.c' || echo '$(srcdir)/'`mytime.c + +xz-mytime.obj: mytime.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-mytime.obj -MD -MP -MF $(DEPDIR)/xz-mytime.Tpo -c -o xz-mytime.obj `if test -f 'mytime.c'; then $(CYGPATH_W) 'mytime.c'; else $(CYGPATH_W) '$(srcdir)/mytime.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-mytime.Tpo $(DEPDIR)/xz-mytime.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mytime.c' object='xz-mytime.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-mytime.obj `if test -f 'mytime.c'; then $(CYGPATH_W) 'mytime.c'; else $(CYGPATH_W) '$(srcdir)/mytime.c'; fi` + +xz-options.o: options.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-options.o -MD -MP -MF $(DEPDIR)/xz-options.Tpo -c -o xz-options.o `test -f 'options.c' || echo '$(srcdir)/'`options.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-options.Tpo $(DEPDIR)/xz-options.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='options.c' object='xz-options.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-options.o `test -f 'options.c' || echo '$(srcdir)/'`options.c + +xz-options.obj: options.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-options.obj -MD -MP -MF $(DEPDIR)/xz-options.Tpo -c -o xz-options.obj `if test -f 'options.c'; then $(CYGPATH_W) 'options.c'; else $(CYGPATH_W) '$(srcdir)/options.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-options.Tpo $(DEPDIR)/xz-options.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='options.c' object='xz-options.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-options.obj `if test -f 'options.c'; then $(CYGPATH_W) 'options.c'; else $(CYGPATH_W) '$(srcdir)/options.c'; fi` + +xz-signals.o: signals.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-signals.o -MD -MP -MF $(DEPDIR)/xz-signals.Tpo -c -o xz-signals.o `test -f 'signals.c' || echo '$(srcdir)/'`signals.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-signals.Tpo $(DEPDIR)/xz-signals.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='signals.c' object='xz-signals.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-signals.o `test -f 'signals.c' || echo '$(srcdir)/'`signals.c + +xz-signals.obj: signals.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-signals.obj -MD -MP -MF $(DEPDIR)/xz-signals.Tpo -c -o xz-signals.obj `if test -f 'signals.c'; then $(CYGPATH_W) 'signals.c'; else $(CYGPATH_W) '$(srcdir)/signals.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-signals.Tpo $(DEPDIR)/xz-signals.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='signals.c' object='xz-signals.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-signals.obj `if test -f 'signals.c'; then $(CYGPATH_W) 'signals.c'; else $(CYGPATH_W) '$(srcdir)/signals.c'; fi` + +xz-suffix.o: suffix.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-suffix.o -MD -MP -MF $(DEPDIR)/xz-suffix.Tpo -c -o xz-suffix.o `test -f 'suffix.c' || echo '$(srcdir)/'`suffix.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-suffix.Tpo $(DEPDIR)/xz-suffix.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='suffix.c' object='xz-suffix.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-suffix.o `test -f 'suffix.c' || echo '$(srcdir)/'`suffix.c + +xz-suffix.obj: suffix.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-suffix.obj -MD -MP -MF $(DEPDIR)/xz-suffix.Tpo -c -o xz-suffix.obj `if test -f 'suffix.c'; then $(CYGPATH_W) 'suffix.c'; else $(CYGPATH_W) '$(srcdir)/suffix.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-suffix.Tpo $(DEPDIR)/xz-suffix.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='suffix.c' object='xz-suffix.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-suffix.obj `if test -f 'suffix.c'; then $(CYGPATH_W) 'suffix.c'; else $(CYGPATH_W) '$(srcdir)/suffix.c'; fi` + +xz-util.o: util.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-util.o -MD -MP -MF $(DEPDIR)/xz-util.Tpo -c -o xz-util.o `test -f 'util.c' || echo '$(srcdir)/'`util.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-util.Tpo $(DEPDIR)/xz-util.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='util.c' object='xz-util.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-util.o `test -f 'util.c' || echo '$(srcdir)/'`util.c + +xz-util.obj: util.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-util.obj -MD -MP -MF $(DEPDIR)/xz-util.Tpo -c -o xz-util.obj `if test -f 'util.c'; then $(CYGPATH_W) 'util.c'; else $(CYGPATH_W) '$(srcdir)/util.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-util.Tpo $(DEPDIR)/xz-util.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='util.c' object='xz-util.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-util.obj `if test -f 'util.c'; then $(CYGPATH_W) 'util.c'; else $(CYGPATH_W) '$(srcdir)/util.c'; fi` + +xz-tuklib_open_stdxxx.o: ../common/tuklib_open_stdxxx.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-tuklib_open_stdxxx.o -MD -MP -MF $(DEPDIR)/xz-tuklib_open_stdxxx.Tpo -c -o xz-tuklib_open_stdxxx.o `test -f '../common/tuklib_open_stdxxx.c' || echo '$(srcdir)/'`../common/tuklib_open_stdxxx.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-tuklib_open_stdxxx.Tpo $(DEPDIR)/xz-tuklib_open_stdxxx.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='../common/tuklib_open_stdxxx.c' object='xz-tuklib_open_stdxxx.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-tuklib_open_stdxxx.o `test -f '../common/tuklib_open_stdxxx.c' || echo '$(srcdir)/'`../common/tuklib_open_stdxxx.c + +xz-tuklib_open_stdxxx.obj: ../common/tuklib_open_stdxxx.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-tuklib_open_stdxxx.obj -MD -MP -MF $(DEPDIR)/xz-tuklib_open_stdxxx.Tpo -c -o xz-tuklib_open_stdxxx.obj `if test -f '../common/tuklib_open_stdxxx.c'; then $(CYGPATH_W) '../common/tuklib_open_stdxxx.c'; else $(CYGPATH_W) '$(srcdir)/../common/tuklib_open_stdxxx.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-tuklib_open_stdxxx.Tpo $(DEPDIR)/xz-tuklib_open_stdxxx.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='../common/tuklib_open_stdxxx.c' object='xz-tuklib_open_stdxxx.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-tuklib_open_stdxxx.obj `if test -f '../common/tuklib_open_stdxxx.c'; then $(CYGPATH_W) '../common/tuklib_open_stdxxx.c'; else $(CYGPATH_W) '$(srcdir)/../common/tuklib_open_stdxxx.c'; fi` + +xz-tuklib_progname.o: ../common/tuklib_progname.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-tuklib_progname.o -MD -MP -MF $(DEPDIR)/xz-tuklib_progname.Tpo -c -o xz-tuklib_progname.o `test -f '../common/tuklib_progname.c' || echo '$(srcdir)/'`../common/tuklib_progname.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-tuklib_progname.Tpo $(DEPDIR)/xz-tuklib_progname.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='../common/tuklib_progname.c' object='xz-tuklib_progname.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-tuklib_progname.o `test -f '../common/tuklib_progname.c' || echo '$(srcdir)/'`../common/tuklib_progname.c + +xz-tuklib_progname.obj: ../common/tuklib_progname.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-tuklib_progname.obj -MD -MP -MF $(DEPDIR)/xz-tuklib_progname.Tpo -c -o xz-tuklib_progname.obj `if test -f '../common/tuklib_progname.c'; then $(CYGPATH_W) '../common/tuklib_progname.c'; else $(CYGPATH_W) '$(srcdir)/../common/tuklib_progname.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-tuklib_progname.Tpo $(DEPDIR)/xz-tuklib_progname.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='../common/tuklib_progname.c' object='xz-tuklib_progname.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-tuklib_progname.obj `if test -f '../common/tuklib_progname.c'; then $(CYGPATH_W) '../common/tuklib_progname.c'; else $(CYGPATH_W) '$(srcdir)/../common/tuklib_progname.c'; fi` + +xz-tuklib_exit.o: ../common/tuklib_exit.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-tuklib_exit.o -MD -MP -MF $(DEPDIR)/xz-tuklib_exit.Tpo -c -o xz-tuklib_exit.o `test -f '../common/tuklib_exit.c' || echo '$(srcdir)/'`../common/tuklib_exit.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-tuklib_exit.Tpo $(DEPDIR)/xz-tuklib_exit.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='../common/tuklib_exit.c' object='xz-tuklib_exit.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-tuklib_exit.o `test -f '../common/tuklib_exit.c' || echo '$(srcdir)/'`../common/tuklib_exit.c + +xz-tuklib_exit.obj: ../common/tuklib_exit.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-tuklib_exit.obj -MD -MP -MF $(DEPDIR)/xz-tuklib_exit.Tpo -c -o xz-tuklib_exit.obj `if test -f '../common/tuklib_exit.c'; then $(CYGPATH_W) '../common/tuklib_exit.c'; else $(CYGPATH_W) '$(srcdir)/../common/tuklib_exit.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-tuklib_exit.Tpo $(DEPDIR)/xz-tuklib_exit.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='../common/tuklib_exit.c' object='xz-tuklib_exit.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-tuklib_exit.obj `if test -f '../common/tuklib_exit.c'; then $(CYGPATH_W) '../common/tuklib_exit.c'; else $(CYGPATH_W) '$(srcdir)/../common/tuklib_exit.c'; fi` + +xz-tuklib_mbstr_width.o: ../common/tuklib_mbstr_width.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-tuklib_mbstr_width.o -MD -MP -MF $(DEPDIR)/xz-tuklib_mbstr_width.Tpo -c -o xz-tuklib_mbstr_width.o `test -f '../common/tuklib_mbstr_width.c' || echo '$(srcdir)/'`../common/tuklib_mbstr_width.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-tuklib_mbstr_width.Tpo $(DEPDIR)/xz-tuklib_mbstr_width.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='../common/tuklib_mbstr_width.c' object='xz-tuklib_mbstr_width.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-tuklib_mbstr_width.o `test -f '../common/tuklib_mbstr_width.c' || echo '$(srcdir)/'`../common/tuklib_mbstr_width.c + +xz-tuklib_mbstr_width.obj: ../common/tuklib_mbstr_width.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-tuklib_mbstr_width.obj -MD -MP -MF $(DEPDIR)/xz-tuklib_mbstr_width.Tpo -c -o xz-tuklib_mbstr_width.obj `if test -f '../common/tuklib_mbstr_width.c'; then $(CYGPATH_W) '../common/tuklib_mbstr_width.c'; else $(CYGPATH_W) '$(srcdir)/../common/tuklib_mbstr_width.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-tuklib_mbstr_width.Tpo $(DEPDIR)/xz-tuklib_mbstr_width.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='../common/tuklib_mbstr_width.c' object='xz-tuklib_mbstr_width.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-tuklib_mbstr_width.obj `if test -f '../common/tuklib_mbstr_width.c'; then $(CYGPATH_W) '../common/tuklib_mbstr_width.c'; else $(CYGPATH_W) '$(srcdir)/../common/tuklib_mbstr_width.c'; fi` + +xz-tuklib_mbstr_fw.o: ../common/tuklib_mbstr_fw.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-tuklib_mbstr_fw.o -MD -MP -MF $(DEPDIR)/xz-tuklib_mbstr_fw.Tpo -c -o xz-tuklib_mbstr_fw.o `test -f '../common/tuklib_mbstr_fw.c' || echo '$(srcdir)/'`../common/tuklib_mbstr_fw.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-tuklib_mbstr_fw.Tpo $(DEPDIR)/xz-tuklib_mbstr_fw.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='../common/tuklib_mbstr_fw.c' object='xz-tuklib_mbstr_fw.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-tuklib_mbstr_fw.o `test -f '../common/tuklib_mbstr_fw.c' || echo '$(srcdir)/'`../common/tuklib_mbstr_fw.c + +xz-tuklib_mbstr_fw.obj: ../common/tuklib_mbstr_fw.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-tuklib_mbstr_fw.obj -MD -MP -MF $(DEPDIR)/xz-tuklib_mbstr_fw.Tpo -c -o xz-tuklib_mbstr_fw.obj `if test -f '../common/tuklib_mbstr_fw.c'; then $(CYGPATH_W) '../common/tuklib_mbstr_fw.c'; else $(CYGPATH_W) '$(srcdir)/../common/tuklib_mbstr_fw.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-tuklib_mbstr_fw.Tpo $(DEPDIR)/xz-tuklib_mbstr_fw.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='../common/tuklib_mbstr_fw.c' object='xz-tuklib_mbstr_fw.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-tuklib_mbstr_fw.obj `if test -f '../common/tuklib_mbstr_fw.c'; then $(CYGPATH_W) '../common/tuklib_mbstr_fw.c'; else $(CYGPATH_W) '$(srcdir)/../common/tuklib_mbstr_fw.c'; fi` + +xz-list.o: list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-list.o -MD -MP -MF $(DEPDIR)/xz-list.Tpo -c -o xz-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-list.Tpo $(DEPDIR)/xz-list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='list.c' object='xz-list.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c + +xz-list.obj: list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT xz-list.obj -MD -MP -MF $(DEPDIR)/xz-list.Tpo -c -o xz-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/xz-list.Tpo $(DEPDIR)/xz-list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='list.c' object='xz-list.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(xz_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o xz-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-man1: $(dist_man_MANS) + @$(NORMAL_INSTALL) + @list1=''; \ + list2='$(dist_man_MANS)'; \ + test -n "$(man1dir)" \ + && test -n "`echo $$list1$$list2`" \ + || exit 0; \ + echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \ + { for i in $$list1; do echo "$$i"; done; \ + if test -n "$$list2"; then \ + for i in $$list2; do echo "$$i"; done \ + | sed -n '/\.1[a-z]*$$/p'; \ + fi; \ + } | while read p; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; echo "$$p"; \ + done | \ + sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ + sed 'N;N;s,\n, ,g' | { \ + list=; while read file base inst; do \ + if test "$$base" = "$$inst"; then list="$$list $$file"; else \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \ + fi; \ + done; \ + for i in $$list; do echo "$$i"; done | $(am__base_list) | \ + while read files; do \ + test -z "$$files" || { \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \ + done; } + +uninstall-man1: + @$(NORMAL_UNINSTALL) + @list=''; test -n "$(man1dir)" || exit 0; \ + files=`{ for i in $$list; do echo "$$i"; done; \ + l2='$(dist_man_MANS)'; for i in $$l2; do echo "$$i"; done | \ + sed -n '/\.1[a-z]*$$/p'; \ + } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ + dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) $(MANS) +installdirs: + for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/xz-args.Po + -rm -f ./$(DEPDIR)/xz-coder.Po + -rm -f ./$(DEPDIR)/xz-file_io.Po + -rm -f ./$(DEPDIR)/xz-hardware.Po + -rm -f ./$(DEPDIR)/xz-list.Po + -rm -f ./$(DEPDIR)/xz-main.Po + -rm -f ./$(DEPDIR)/xz-message.Po + -rm -f ./$(DEPDIR)/xz-mytime.Po + -rm -f ./$(DEPDIR)/xz-options.Po + -rm -f ./$(DEPDIR)/xz-signals.Po + -rm -f ./$(DEPDIR)/xz-suffix.Po + -rm -f ./$(DEPDIR)/xz-tuklib_exit.Po + -rm -f ./$(DEPDIR)/xz-tuklib_mbstr_fw.Po + -rm -f ./$(DEPDIR)/xz-tuklib_mbstr_width.Po + -rm -f ./$(DEPDIR)/xz-tuklib_open_stdxxx.Po + -rm -f ./$(DEPDIR)/xz-tuklib_progname.Po + -rm -f ./$(DEPDIR)/xz-util.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-man + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-data-hook +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-binPROGRAMS + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-exec-hook +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: install-man1 + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/xz-args.Po + -rm -f ./$(DEPDIR)/xz-coder.Po + -rm -f ./$(DEPDIR)/xz-file_io.Po + -rm -f ./$(DEPDIR)/xz-hardware.Po + -rm -f ./$(DEPDIR)/xz-list.Po + -rm -f ./$(DEPDIR)/xz-main.Po + -rm -f ./$(DEPDIR)/xz-message.Po + -rm -f ./$(DEPDIR)/xz-mytime.Po + -rm -f ./$(DEPDIR)/xz-options.Po + -rm -f ./$(DEPDIR)/xz-signals.Po + -rm -f ./$(DEPDIR)/xz-suffix.Po + -rm -f ./$(DEPDIR)/xz-tuklib_exit.Po + -rm -f ./$(DEPDIR)/xz-tuklib_mbstr_fw.Po + -rm -f ./$(DEPDIR)/xz-tuklib_mbstr_width.Po + -rm -f ./$(DEPDIR)/xz-tuklib_open_stdxxx.Po + -rm -f ./$(DEPDIR)/xz-tuklib_progname.Po + -rm -f ./$(DEPDIR)/xz-util.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binPROGRAMS uninstall-man + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) uninstall-hook +uninstall-man: uninstall-man1 + +.MAKE: install-am install-data-am install-exec-am install-strip \ + uninstall-am + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-binPROGRAMS clean-generic clean-libtool cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-binPROGRAMS \ + install-data install-data-am install-data-hook install-dvi \ + install-dvi-am install-exec install-exec-am install-exec-hook \ + install-html install-html-am install-info install-info-am \ + install-man install-man1 install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \ + uninstall-am uninstall-binPROGRAMS uninstall-hook \ + uninstall-man uninstall-man1 + +.PRECIOUS: Makefile + + +# Windows resource compiler support +.rc.o: + $(RC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(xz_CPPFLAGS) $(CPPFLAGS) $(RCFLAGS) -i $< -o $@ + +install-exec-hook: + cd "$(DESTDIR)$(bindir)" && \ + target=`echo xz | sed '$(transform)'`$(EXEEXT) && \ + for name in $(xzlinks); do \ + link=`echo $$name | sed '$(transform)'`$(LN_EXEEXT) && \ + rm -f "$$link" && \ + $(LN_S) "$$target" "$$link"; \ + done + +# The installation of translated man pages abuses Automake internals +# by calling "install-man" with redefined dist_man_MANS and man_MANS. +# If this breaks some day, don't blame Automake developers. +install-data-hook: + languages= ; \ + if test "$(USE_NLS)" = yes && test -d "$(top_srcdir)/po4a/man"; then \ + languages=`ls "$(top_srcdir)/po4a/man"`; \ + fi; \ + target=`echo xz | sed '$(transform)'` && \ + for lang in . $$languages; do \ + man="$(top_srcdir)/po4a/man/$$lang/xz.1" ; \ + if test -f "$$man"; then \ + $(MAKE) dist_man_MANS="$$man" man_MANS= \ + mandir="$(mandir)/$$lang" install-man; \ + fi; \ + man1dir="$(DESTDIR)$(mandir)/$$lang/man1" && \ + if test -f "$$man1dir/$$target.1"; then ( \ + cd "$$man1dir" && \ + for name in $(xzlinks); do \ + link=`echo $$name | sed '$(transform)'` && \ + rm -f "$$link.1" && \ + $(LN_S) "$$target.1" "$$link.1"; \ + done \ + ); fi; \ + done + +uninstall-hook: + cd "$(DESTDIR)$(bindir)" && \ + for name in $(xzlinks); do \ + link=`echo $$name | sed '$(transform)'`$(LN_EXEEXT) && \ + rm -f "$$link"; \ + done + languages= ; \ + if test "$(USE_NLS)" = yes && test -d "$(top_srcdir)/po4a/man"; then \ + languages=`ls "$(top_srcdir)/po4a/man"`; \ + fi; \ + for lang in . $$languages; do \ + for name in xz $(xzlinks); do \ + name=`echo $$name | sed '$(transform)'` && \ + rm -f "$(DESTDIR)$(mandir)/$$lang/man1/$$name.1"; \ + done; \ + done + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/xz/args.c b/src/xz/args.c new file mode 100644 index 0000000..b0a1174 --- /dev/null +++ b/src/xz/args.c @@ -0,0 +1,751 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file args.c +/// \brief Argument parsing +/// +/// \note Filter-specific options parsing is in options.c. +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#include "getopt.h" +#include <ctype.h> + + +bool opt_stdout = false; +bool opt_force = false; +bool opt_keep_original = false; +bool opt_robot = false; +bool opt_ignore_check = false; + +// We don't modify or free() this, but we need to assign it in some +// non-const pointers. +const char stdin_filename[] = "(stdin)"; + + +/// Parse and set the memory usage limit for compression, decompression, +/// and/or multithreaded decompression. +static void +parse_memlimit(const char *name, const char *name_percentage, const char *str, + bool set_compress, bool set_decompress, bool set_mtdec) +{ + bool is_percentage = false; + uint64_t value; + + const size_t len = strlen(str); + if (len > 0 && str[len - 1] == '%') { + // Make a copy so that we can get rid of %. + // + // In the past str wasn't const and we modified it directly + // but that modified argv[] and thus affected what was visible + // in "ps auxf" or similar tools which was confusing. For + // example, --memlimit=50% would show up as --memlimit=50 + // since the percent sign was overwritten here. + char *s = xstrdup(str); + s[len - 1] = '\0'; + is_percentage = true; + value = str_to_uint64(name_percentage, s, 1, 100); + free(s); + } else { + // On 32-bit systems, SIZE_MAX would make more sense than + // UINT64_MAX. But use UINT64_MAX still so that scripts + // that assume > 4 GiB values don't break. + value = str_to_uint64(name, str, 0, UINT64_MAX); + } + + hardware_memlimit_set(value, set_compress, set_decompress, set_mtdec, + is_percentage); + return; +} + + +static void +parse_block_list(const char *str_const) +{ + // We need a modifiable string in the for-loop. + char *str_start = xstrdup(str_const); + char *str = str_start; + + // It must be non-empty and not begin with a comma. + if (str[0] == '\0' || str[0] == ',') + message_fatal(_("%s: Invalid argument to --block-list"), str); + + // Count the number of comma-separated strings. + size_t count = 1; + for (size_t i = 0; str[i] != '\0'; ++i) + if (str[i] == ',') + ++count; + + // Prevent an unlikely integer overflow. + if (count > SIZE_MAX / sizeof(uint64_t) - 1) + message_fatal(_("%s: Too many arguments to --block-list"), + str); + + // Allocate memory to hold all the sizes specified. + // If --block-list was specified already, its value is forgotten. + free(opt_block_list); + opt_block_list = xmalloc((count + 1) * sizeof(uint64_t)); + + for (size_t i = 0; i < count; ++i) { + // Locate the next comma and replace it with \0. + char *p = strchr(str, ','); + if (p != NULL) + *p = '\0'; + + if (str[0] == '\0') { + // There is no string, that is, a comma follows + // another comma. Use the previous value. + // + // NOTE: We checked earlier that the first char + // of the whole list cannot be a comma. + assert(i > 0); + opt_block_list[i] = opt_block_list[i - 1]; + } else { + opt_block_list[i] = str_to_uint64("block-list", str, + 0, UINT64_MAX); + + // Zero indicates no more new Blocks. + if (opt_block_list[i] == 0) { + if (i + 1 != count) + message_fatal(_("0 can only be used " + "as the last element " + "in --block-list")); + + opt_block_list[i] = UINT64_MAX; + } + } + + str = p + 1; + } + + // Terminate the array. + opt_block_list[count] = 0; + + free(str_start); + return; +} + + +static void +parse_real(args_info *args, int argc, char **argv) +{ + enum { + OPT_X86 = INT_MIN, + OPT_POWERPC, + OPT_IA64, + OPT_ARM, + OPT_ARMTHUMB, + OPT_ARM64, + OPT_SPARC, + OPT_DELTA, + OPT_LZMA1, + OPT_LZMA2, + + OPT_SINGLE_STREAM, + OPT_NO_SPARSE, + OPT_FILES, + OPT_FILES0, + OPT_BLOCK_SIZE, + OPT_BLOCK_LIST, + OPT_MEM_COMPRESS, + OPT_MEM_DECOMPRESS, + OPT_MEM_MT_DECOMPRESS, + OPT_NO_ADJUST, + OPT_INFO_MEMORY, + OPT_ROBOT, + OPT_FLUSH_TIMEOUT, + OPT_IGNORE_CHECK, + }; + + static const char short_opts[] + = "cC:defF:hHlkM:qQrS:tT:vVz0123456789"; + + static const struct option long_opts[] = { + // Operation mode + { "compress", no_argument, NULL, 'z' }, + { "decompress", no_argument, NULL, 'd' }, + { "uncompress", no_argument, NULL, 'd' }, + { "test", no_argument, NULL, 't' }, + { "list", no_argument, NULL, 'l' }, + + // Operation modifiers + { "keep", no_argument, NULL, 'k' }, + { "force", no_argument, NULL, 'f' }, + { "stdout", no_argument, NULL, 'c' }, + { "to-stdout", no_argument, NULL, 'c' }, + { "single-stream", no_argument, NULL, OPT_SINGLE_STREAM }, + { "no-sparse", no_argument, NULL, OPT_NO_SPARSE }, + { "suffix", required_argument, NULL, 'S' }, + // { "recursive", no_argument, NULL, 'r' }, // TODO + { "files", optional_argument, NULL, OPT_FILES }, + { "files0", optional_argument, NULL, OPT_FILES0 }, + + // Basic compression settings + { "format", required_argument, NULL, 'F' }, + { "check", required_argument, NULL, 'C' }, + { "ignore-check", no_argument, NULL, OPT_IGNORE_CHECK }, + { "block-size", required_argument, NULL, OPT_BLOCK_SIZE }, + { "block-list", required_argument, NULL, OPT_BLOCK_LIST }, + { "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS }, + { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS }, + { "memlimit-mt-decompress", required_argument, NULL, OPT_MEM_MT_DECOMPRESS }, + { "memlimit", required_argument, NULL, 'M' }, + { "memory", required_argument, NULL, 'M' }, // Old alias + { "no-adjust", no_argument, NULL, OPT_NO_ADJUST }, + { "threads", required_argument, NULL, 'T' }, + { "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT }, + + { "extreme", no_argument, NULL, 'e' }, + { "fast", no_argument, NULL, '0' }, + { "best", no_argument, NULL, '9' }, + + // Filters + { "lzma1", optional_argument, NULL, OPT_LZMA1 }, + { "lzma2", optional_argument, NULL, OPT_LZMA2 }, + { "x86", optional_argument, NULL, OPT_X86 }, + { "powerpc", optional_argument, NULL, OPT_POWERPC }, + { "ia64", optional_argument, NULL, OPT_IA64 }, + { "arm", optional_argument, NULL, OPT_ARM }, + { "armthumb", optional_argument, NULL, OPT_ARMTHUMB }, + { "arm64", optional_argument, NULL, OPT_ARM64 }, + { "sparc", optional_argument, NULL, OPT_SPARC }, + { "delta", optional_argument, NULL, OPT_DELTA }, + + // Other options + { "quiet", no_argument, NULL, 'q' }, + { "verbose", no_argument, NULL, 'v' }, + { "no-warn", no_argument, NULL, 'Q' }, + { "robot", no_argument, NULL, OPT_ROBOT }, + { "info-memory", no_argument, NULL, OPT_INFO_MEMORY }, + { "help", no_argument, NULL, 'h' }, + { "long-help", no_argument, NULL, 'H' }, + { "version", no_argument, NULL, 'V' }, + + { NULL, 0, NULL, 0 } + }; + + int c; + + while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) + != -1) { + switch (c) { + // Compression preset (also for decompression if --format=raw) + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + coder_set_preset((uint32_t)(c - '0')); + break; + + // --memlimit-compress + case OPT_MEM_COMPRESS: + parse_memlimit("memlimit-compress", + "memlimit-compress%", optarg, + true, false, false); + break; + + // --memlimit-decompress + case OPT_MEM_DECOMPRESS: + parse_memlimit("memlimit-decompress", + "memlimit-decompress%", optarg, + false, true, false); + break; + + // --memlimit-mt-decompress + case OPT_MEM_MT_DECOMPRESS: + parse_memlimit("memlimit-mt-decompress", + "memlimit-mt-decompress%", optarg, + false, false, true); + break; + + // --memlimit + case 'M': + parse_memlimit("memlimit", "memlimit%", optarg, + true, true, true); + break; + + // --suffix + case 'S': + suffix_set(optarg); + break; + + case 'T': { + // Since xz 5.4.0: Ignore leading '+' first. + const char *s = optarg; + if (optarg[0] == '+') + ++s; + + // The max is from src/liblzma/common/common.h. + uint32_t t = str_to_uint64("threads", s, 0, 16384); + + // If leading '+' was used then use multi-threaded + // mode even if exactly one thread was specified. + if (t == 1 && optarg[0] == '+') + t = UINT32_MAX; + + hardware_threads_set(t); + break; + } + + // --version + case 'V': + // This doesn't return. + message_version(); + + // --stdout + case 'c': + opt_stdout = true; + break; + + // --decompress + case 'd': + opt_mode = MODE_DECOMPRESS; + break; + + // --extreme + case 'e': + coder_set_extreme(); + break; + + // --force + case 'f': + opt_force = true; + break; + + // --info-memory + case OPT_INFO_MEMORY: + // This doesn't return. + hardware_memlimit_show(); + + // --help + case 'h': + // This doesn't return. + message_help(false); + + // --long-help + case 'H': + // This doesn't return. + message_help(true); + + // --list + case 'l': + opt_mode = MODE_LIST; + break; + + // --keep + case 'k': + opt_keep_original = true; + break; + + // --quiet + case 'q': + message_verbosity_decrease(); + break; + + case 'Q': + set_exit_no_warn(); + break; + + case 't': + opt_mode = MODE_TEST; + break; + + // --verbose + case 'v': + message_verbosity_increase(); + break; + + // --robot + case OPT_ROBOT: + opt_robot = true; + + // This is to make sure that floating point numbers + // always have a dot as decimal separator. + setlocale(LC_NUMERIC, "C"); + break; + + case 'z': + opt_mode = MODE_COMPRESS; + break; + + // Filter setup + + case OPT_X86: + coder_add_filter(LZMA_FILTER_X86, + options_bcj(optarg)); + break; + + case OPT_POWERPC: + coder_add_filter(LZMA_FILTER_POWERPC, + options_bcj(optarg)); + break; + + case OPT_IA64: + coder_add_filter(LZMA_FILTER_IA64, + options_bcj(optarg)); + break; + + case OPT_ARM: + coder_add_filter(LZMA_FILTER_ARM, + options_bcj(optarg)); + break; + + case OPT_ARMTHUMB: + coder_add_filter(LZMA_FILTER_ARMTHUMB, + options_bcj(optarg)); + break; + + case OPT_ARM64: + coder_add_filter(LZMA_FILTER_ARM64, + options_bcj(optarg)); + break; + + case OPT_SPARC: + coder_add_filter(LZMA_FILTER_SPARC, + options_bcj(optarg)); + break; + + case OPT_DELTA: + coder_add_filter(LZMA_FILTER_DELTA, + options_delta(optarg)); + break; + + case OPT_LZMA1: + coder_add_filter(LZMA_FILTER_LZMA1, + options_lzma(optarg)); + break; + + case OPT_LZMA2: + coder_add_filter(LZMA_FILTER_LZMA2, + options_lzma(optarg)); + break; + + // Other + + // --format + case 'F': { + // Just in case, support both "lzma" and "alone" since + // the latter was used for forward compatibility in + // LZMA Utils 4.32.x. + static const struct { + char str[8]; + enum format_type format; + } types[] = { + { "auto", FORMAT_AUTO }, + { "xz", FORMAT_XZ }, + { "lzma", FORMAT_LZMA }, + { "alone", FORMAT_LZMA }, +#ifdef HAVE_LZIP_DECODER + { "lzip", FORMAT_LZIP }, +#endif + { "raw", FORMAT_RAW }, + }; + + size_t i = 0; + while (strcmp(types[i].str, optarg) != 0) + if (++i == ARRAY_SIZE(types)) + message_fatal(_("%s: Unknown file " + "format type"), + optarg); + + opt_format = types[i].format; + break; + } + + // --check + case 'C': { + static const struct { + char str[8]; + lzma_check check; + } types[] = { + { "none", LZMA_CHECK_NONE }, + { "crc32", LZMA_CHECK_CRC32 }, + { "crc64", LZMA_CHECK_CRC64 }, + { "sha256", LZMA_CHECK_SHA256 }, + }; + + size_t i = 0; + while (strcmp(types[i].str, optarg) != 0) { + if (++i == ARRAY_SIZE(types)) + message_fatal(_("%s: Unsupported " + "integrity " + "check type"), optarg); + } + + // Use a separate check in case we are using different + // liblzma than what was used to compile us. + if (!lzma_check_is_supported(types[i].check)) + message_fatal(_("%s: Unsupported integrity " + "check type"), optarg); + + coder_set_check(types[i].check); + break; + } + + case OPT_IGNORE_CHECK: + opt_ignore_check = true; + break; + + case OPT_BLOCK_SIZE: + opt_block_size = str_to_uint64("block-size", optarg, + 0, LZMA_VLI_MAX); + break; + + case OPT_BLOCK_LIST: { + parse_block_list(optarg); + break; + } + + case OPT_SINGLE_STREAM: + opt_single_stream = true; + break; + + case OPT_NO_SPARSE: + io_no_sparse(); + break; + + case OPT_FILES: + args->files_delim = '\n'; + + // Fall through + + case OPT_FILES0: + if (args->files_name != NULL) + message_fatal(_("Only one file can be " + "specified with `--files' " + "or `--files0'.")); + + if (optarg == NULL) { + args->files_name = stdin_filename; + args->files_file = stdin; + } else { + args->files_name = optarg; + args->files_file = fopen(optarg, + c == OPT_FILES ? "r" : "rb"); + if (args->files_file == NULL) + message_fatal("%s: %s", optarg, + strerror(errno)); + } + + break; + + case OPT_NO_ADJUST: + opt_auto_adjust = false; + break; + + case OPT_FLUSH_TIMEOUT: + opt_flush_timeout = str_to_uint64("flush-timeout", + optarg, 0, UINT64_MAX); + break; + + default: + message_try_help(); + tuklib_exit(E_ERROR, E_ERROR, false); + } + } + + return; +} + + +static void +parse_environment(args_info *args, char *argv0, const char *varname) +{ + char *env = getenv(varname); + if (env == NULL) + return; + + // We modify the string, so make a copy of it. + env = xstrdup(env); + + // Calculate the number of arguments in env. argc stats at one + // to include space for the program name. + int argc = 1; + bool prev_was_space = true; + for (size_t i = 0; env[i] != '\0'; ++i) { + // NOTE: Cast to unsigned char is needed so that correct + // value gets passed to isspace(), which expects + // unsigned char cast to int. Casting to int is done + // automatically due to integer promotion, but we need to + // force char to unsigned char manually. Otherwise 8-bit + // characters would get promoted to wrong value if + // char is signed. + if (isspace((unsigned char)env[i])) { + prev_was_space = true; + } else if (prev_was_space) { + prev_was_space = false; + + // Keep argc small enough to fit into a signed int + // and to keep it usable for memory allocation. + if (++argc == my_min( + INT_MAX, SIZE_MAX / sizeof(char *))) + message_fatal(_("The environment variable " + "%s contains too many " + "arguments"), varname); + } + } + + // Allocate memory to hold pointers to the arguments. Add one to get + // space for the terminating NULL (if some systems happen to need it). + char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *)); + argv[0] = argv0; + argv[argc] = NULL; + + // Go through the string again. Split the arguments using '\0' + // characters and add pointers to the resulting strings to argv. + argc = 1; + prev_was_space = true; + for (size_t i = 0; env[i] != '\0'; ++i) { + if (isspace((unsigned char)env[i])) { + prev_was_space = true; + env[i] = '\0'; + } else if (prev_was_space) { + prev_was_space = false; + argv[argc++] = env + i; + } + } + + // Parse the argument list we got from the environment. All non-option + // arguments i.e. filenames are ignored. + parse_real(args, argc, argv); + + // Reset the state of the getopt_long() so that we can parse the + // command line options too. There are two incompatible ways to + // do it. +#ifdef HAVE_OPTRESET + // BSD + optind = 1; + optreset = 1; +#else + // GNU, Solaris + optind = 0; +#endif + + // We don't need the argument list from environment anymore. + free(argv); + free(env); + + return; +} + + +extern void +args_parse(args_info *args, int argc, char **argv) +{ + // Initialize those parts of *args that we need later. + args->files_name = NULL; + args->files_file = NULL; + args->files_delim = '\0'; + + // Check how we were called. + { + // Remove the leading path name, if any. + const char *name = strrchr(argv[0], '/'); + if (name == NULL) + name = argv[0]; + else + ++name; + + // NOTE: It's possible that name[0] is now '\0' if argv[0] + // is weird, but it doesn't matter here. + + // Look for full command names instead of substrings like + // "un", "cat", and "lz" to reduce possibility of false + // positives when the programs have been renamed. + if (strstr(name, "xzcat") != NULL) { + opt_mode = MODE_DECOMPRESS; + opt_stdout = true; + } else if (strstr(name, "unxz") != NULL) { + opt_mode = MODE_DECOMPRESS; + } else if (strstr(name, "lzcat") != NULL) { + opt_format = FORMAT_LZMA; + opt_mode = MODE_DECOMPRESS; + opt_stdout = true; + } else if (strstr(name, "unlzma") != NULL) { + opt_format = FORMAT_LZMA; + opt_mode = MODE_DECOMPRESS; + } else if (strstr(name, "lzma") != NULL) { + opt_format = FORMAT_LZMA; + } + } + + // First the flags from the environment + parse_environment(args, argv[0], "XZ_DEFAULTS"); + parse_environment(args, argv[0], "XZ_OPT"); + + // Then from the command line + parse_real(args, argc, argv); + + // If encoder or decoder support was omitted at build time, + // show an error now so that the rest of the code can rely on + // that whatever is in opt_mode is also supported. +#ifndef HAVE_ENCODERS + if (opt_mode == MODE_COMPRESS) + message_fatal(_("Compression support was disabled " + "at build time")); +#endif +#ifndef HAVE_DECODERS + // Even MODE_LIST cannot work without decoder support so MODE_COMPRESS + // is the only valid choice. + if (opt_mode != MODE_COMPRESS) + message_fatal(_("Decompression support was disabled " + "at build time")); +#endif + +#ifdef HAVE_LZIP_DECODER + if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_LZIP) + message_fatal(_("Compression of lzip files (.lz) " + "is not supported")); +#endif + + // Never remove the source file when the destination is not on disk. + // In test mode the data is written nowhere, but setting opt_stdout + // will make the rest of the code behave well. + if (opt_stdout || opt_mode == MODE_TEST) { + opt_keep_original = true; + opt_stdout = true; + } + + // When compressing, if no --format flag was used, or it + // was --format=auto, we compress to the .xz format. + if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO) + opt_format = FORMAT_XZ; + + // Compression settings need to be validated (options themselves and + // their memory usage) when compressing to any file format. It has to + // be done also when uncompressing raw data, since for raw decoding + // the options given on the command line are used to know what kind + // of raw data we are supposed to decode. + if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW) + coder_set_compression_settings(); + + // If no filenames are given, use stdin. + if (argv[optind] == NULL && args->files_name == NULL) { + // We don't modify or free() the "-" constant. The caller + // modifies this so don't make the struct itself const. + static char *names_stdin[2] = { (char *)"-", NULL }; + args->arg_names = names_stdin; + args->arg_count = 1; + } else { + // We got at least one filename from the command line, or + // --files or --files0 was specified. + args->arg_names = argv + optind; + args->arg_count = (unsigned int)(argc - optind); + } + + return; +} + + +#ifndef NDEBUG +extern void +args_free(void) +{ + free(opt_block_list); + return; +} +#endif diff --git a/src/xz/args.h b/src/xz/args.h new file mode 100644 index 0000000..a1a5930 --- /dev/null +++ b/src/xz/args.h @@ -0,0 +1,44 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file args.h +/// \brief Argument parsing +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +typedef struct { + /// Filenames from command line + char **arg_names; + + /// Number of filenames from command line + unsigned int arg_count; + + /// Name of the file from which to read filenames. This is NULL + /// if --files or --files0 was not used. + const char *files_name; + + /// File opened for reading from which filenames are read. This is + /// non-NULL only if files_name is non-NULL. + FILE *files_file; + + /// Delimiter for filenames read from files_file + char files_delim; + +} args_info; + + +extern bool opt_stdout; +extern bool opt_force; +extern bool opt_keep_original; +// extern bool opt_recursive; +extern bool opt_robot; +extern bool opt_ignore_check; + +extern const char stdin_filename[]; + +extern void args_parse(args_info *args, int argc, char **argv); +extern void args_free(void); diff --git a/src/xz/coder.c b/src/xz/coder.c new file mode 100644 index 0000000..05f2288 --- /dev/null +++ b/src/xz/coder.c @@ -0,0 +1,1108 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file coder.c +/// \brief Compresses or uncompresses a file +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +/// Return value type for coder_init(). +enum coder_init_ret { + CODER_INIT_NORMAL, + CODER_INIT_PASSTHRU, + CODER_INIT_ERROR, +}; + + +enum operation_mode opt_mode = MODE_COMPRESS; +enum format_type opt_format = FORMAT_AUTO; +bool opt_auto_adjust = true; +bool opt_single_stream = false; +uint64_t opt_block_size = 0; +uint64_t *opt_block_list = NULL; + + +/// Stream used to communicate with liblzma +static lzma_stream strm = LZMA_STREAM_INIT; + +/// Filters needed for all encoding all formats, and also decoding in raw data +static lzma_filter filters[LZMA_FILTERS_MAX + 1]; + +/// Input and output buffers +static io_buf in_buf; +static io_buf out_buf; + +/// Number of filters. Zero indicates that we are using a preset. +static uint32_t filters_count = 0; + +/// Number of the preset (0-9) +static uint32_t preset_number = LZMA_PRESET_DEFAULT; + +/// Integrity check type +static lzma_check check; + +/// This becomes false if the --check=CHECK option is used. +static bool check_default = true; + +/// Indicates if unconsumed input is allowed to remain after +/// decoding has successfully finished. This is set for each file +/// in coder_init(). +static bool allow_trailing_input; + +#ifdef MYTHREAD_ENABLED +static lzma_mt mt_options = { + .flags = 0, + .timeout = 300, + .filters = filters, +}; +#endif + + +extern void +coder_set_check(lzma_check new_check) +{ + check = new_check; + check_default = false; + return; +} + + +static void +forget_filter_chain(void) +{ + // Setting a preset makes us forget a possibly defined custom + // filter chain. + while (filters_count > 0) { + --filters_count; + free(filters[filters_count].options); + filters[filters_count].options = NULL; + } + + return; +} + + +extern void +coder_set_preset(uint32_t new_preset) +{ + preset_number &= ~LZMA_PRESET_LEVEL_MASK; + preset_number |= new_preset; + forget_filter_chain(); + return; +} + + +extern void +coder_set_extreme(void) +{ + preset_number |= LZMA_PRESET_EXTREME; + forget_filter_chain(); + return; +} + + +extern void +coder_add_filter(lzma_vli id, void *options) +{ + if (filters_count == LZMA_FILTERS_MAX) + message_fatal(_("Maximum number of filters is four")); + + filters[filters_count].id = id; + filters[filters_count].options = options; + ++filters_count; + + // Setting a custom filter chain makes us forget the preset options. + // This makes a difference if one specifies e.g. "xz -9 --lzma2 -e" + // where the custom filter chain resets the preset level back to + // the default 6, making the example equivalent to "xz -6e". + preset_number = LZMA_PRESET_DEFAULT; + + return; +} + + +static void lzma_attribute((__noreturn__)) +memlimit_too_small(uint64_t memory_usage) +{ + message(V_ERROR, _("Memory usage limit is too low for the given " + "filter setup.")); + message_mem_needed(V_ERROR, memory_usage); + tuklib_exit(E_ERROR, E_ERROR, false); +} + + +extern void +coder_set_compression_settings(void) +{ +#ifdef HAVE_LZIP_DECODER + // .lz compression isn't supported. + assert(opt_format != FORMAT_LZIP); +#endif + + // The default check type is CRC64, but fallback to CRC32 + // if CRC64 isn't supported by the copy of liblzma we are + // using. CRC32 is always supported. + if (check_default) { + check = LZMA_CHECK_CRC64; + if (!lzma_check_is_supported(check)) + check = LZMA_CHECK_CRC32; + } + + // Options for LZMA1 or LZMA2 in case we are using a preset. + static lzma_options_lzma opt_lzma; + + if (filters_count == 0) { + // We are using a preset. This is not a good idea in raw mode + // except when playing around with things. Different versions + // of this software may use different options in presets, and + // thus make uncompressing the raw data difficult. + if (opt_format == FORMAT_RAW) { + // The message is shown only if warnings are allowed + // but the exit status isn't changed. + message(V_WARNING, _("Using a preset in raw mode " + "is discouraged.")); + message(V_WARNING, _("The exact options of the " + "presets may vary between software " + "versions.")); + } + + // Get the preset for LZMA1 or LZMA2. + if (lzma_lzma_preset(&opt_lzma, preset_number)) + message_bug(); + + // Use LZMA2 except with --format=lzma we use LZMA1. + filters[0].id = opt_format == FORMAT_LZMA + ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; + filters[0].options = &opt_lzma; + filters_count = 1; + } + + // Terminate the filter options array. + filters[filters_count].id = LZMA_VLI_UNKNOWN; + + // If we are using the .lzma format, allow exactly one filter + // which has to be LZMA1. + if (opt_format == FORMAT_LZMA && (filters_count != 1 + || filters[0].id != LZMA_FILTER_LZMA1)) + message_fatal(_("The .lzma format supports only " + "the LZMA1 filter")); + + // If we are using the .xz format, make sure that there is no LZMA1 + // filter to prevent LZMA_PROG_ERROR. + if (opt_format == FORMAT_XZ) + for (size_t i = 0; i < filters_count; ++i) + if (filters[i].id == LZMA_FILTER_LZMA1) + message_fatal(_("LZMA1 cannot be used " + "with the .xz format")); + + // Print the selected filter chain. + message_filters_show(V_DEBUG, filters); + + // The --flush-timeout option requires LZMA_SYNC_FLUSH support + // from the filter chain. Currently threaded encoder doesn't support + // LZMA_SYNC_FLUSH so single-threaded mode must be used. + if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) { + for (size_t i = 0; i < filters_count; ++i) { + switch (filters[i].id) { + case LZMA_FILTER_LZMA2: + case LZMA_FILTER_DELTA: + break; + + default: + message_fatal(_("The filter chain is " + "incompatible with --flush-timeout")); + } + } + + if (hardware_threads_is_mt()) { + message(V_WARNING, _("Switching to single-threaded " + "mode due to --flush-timeout")); + hardware_threads_set(1); + } + } + + // Get the memory usage. Note that if --format=raw was used, + // we can be decompressing. + // + // If multithreaded .xz compression is done, this value will be + // replaced. + uint64_t memory_limit = hardware_memlimit_get(opt_mode); + uint64_t memory_usage = UINT64_MAX; + if (opt_mode == MODE_COMPRESS) { +#ifdef HAVE_ENCODERS +# ifdef MYTHREAD_ENABLED + if (opt_format == FORMAT_XZ && hardware_threads_is_mt()) { + memory_limit = hardware_memlimit_mtenc_get(); + mt_options.threads = hardware_threads_get(); + mt_options.block_size = opt_block_size; + mt_options.check = check; + memory_usage = lzma_stream_encoder_mt_memusage( + &mt_options); + if (memory_usage != UINT64_MAX) + message(V_DEBUG, _("Using up to %" PRIu32 + " threads."), + mt_options.threads); + } else +# endif + { + memory_usage = lzma_raw_encoder_memusage(filters); + } +#endif + } else { +#ifdef HAVE_DECODERS + memory_usage = lzma_raw_decoder_memusage(filters); +#endif + } + + if (memory_usage == UINT64_MAX) + message_fatal(_("Unsupported filter chain or filter options")); + + // Print memory usage info before possible dictionary + // size auto-adjusting. + // + // NOTE: If only encoder support was built, we cannot show the + // what the decoder memory usage will be. + message_mem_needed(V_DEBUG, memory_usage); +#ifdef HAVE_DECODERS + if (opt_mode == MODE_COMPRESS) { + const uint64_t decmem = lzma_raw_decoder_memusage(filters); + if (decmem != UINT64_MAX) + message(V_DEBUG, _("Decompression will need " + "%s MiB of memory."), uint64_to_str( + round_up_to_mib(decmem), 0)); + } +#endif + + if (memory_usage <= memory_limit) + return; + + // With --format=raw settings are never adjusted to meet + // the memory usage limit. + if (opt_format == FORMAT_RAW) + memlimit_too_small(memory_usage); + + assert(opt_mode == MODE_COMPRESS); + +#ifdef HAVE_ENCODERS +# ifdef MYTHREAD_ENABLED + if (opt_format == FORMAT_XZ && hardware_threads_is_mt()) { + // Try to reduce the number of threads before + // adjusting the compression settings down. + while (mt_options.threads > 1) { + // Reduce the number of threads by one and check + // the memory usage. + --mt_options.threads; + memory_usage = lzma_stream_encoder_mt_memusage( + &mt_options); + if (memory_usage == UINT64_MAX) + message_bug(); + + if (memory_usage <= memory_limit) { + // The memory usage is now low enough. + message(V_WARNING, _("Reduced the number of " + "threads from %s to %s to not exceed " + "the memory usage limit of %s MiB"), + uint64_to_str( + hardware_threads_get(), 0), + uint64_to_str(mt_options.threads, 1), + uint64_to_str(round_up_to_mib( + memory_limit), 2)); + return; + } + } + + // If the memory usage limit is only a soft limit (automatic + // number of threads and no --memlimit-compress), the limit + // is only used to reduce the number of threads and once at + // just one thread, the limit is completely ignored. This + // way -T0 won't use insane amount of memory but at the same + // time the soft limit will never make xz fail and never make + // xz change settings that would affect the compressed output. + if (hardware_memlimit_mtenc_is_default()) { + message(V_WARNING, _("Reduced the number of threads " + "from %s to one. The automatic memory usage " + "limit of %s MiB is still being exceeded. " + "%s MiB of memory is required. " + "Continuing anyway."), + uint64_to_str(hardware_threads_get(), 0), + uint64_to_str( + round_up_to_mib(memory_limit), 1), + uint64_to_str( + round_up_to_mib(memory_usage), 2)); + return; + } + + // If --no-adjust was used, we cannot drop to single-threaded + // mode since it produces different compressed output. + // + // NOTE: In xz 5.2.x, --no-adjust also prevented reducing + // the number of threads. This changed in 5.3.3alpha. + if (!opt_auto_adjust) + memlimit_too_small(memory_usage); + + // Switch to single-threaded mode. It uses + // less memory than using one thread in + // the multithreaded mode but the output + // is also different. + hardware_threads_set(1); + memory_usage = lzma_raw_encoder_memusage(filters); + message(V_WARNING, _("Switching to single-threaded mode " + "to not exceed the memory usage limit of %s MiB"), + uint64_to_str(round_up_to_mib(memory_limit), 0)); + } +# endif + + if (memory_usage <= memory_limit) + return; + + // Don't adjust LZMA2 or LZMA1 dictionary size if --no-adjust + // was specified as that would change the compressed output. + if (!opt_auto_adjust) + memlimit_too_small(memory_usage); + + // Look for the last filter if it is LZMA2 or LZMA1, so we can make + // it use less RAM. With other filters we don't know what to do. + size_t i = 0; + while (filters[i].id != LZMA_FILTER_LZMA2 + && filters[i].id != LZMA_FILTER_LZMA1) { + if (filters[i].id == LZMA_VLI_UNKNOWN) + memlimit_too_small(memory_usage); + + ++i; + } + + // Decrease the dictionary size until we meet the memory + // usage limit. First round down to full mebibytes. + lzma_options_lzma *opt = filters[i].options; + const uint32_t orig_dict_size = opt->dict_size; + opt->dict_size &= ~((UINT32_C(1) << 20) - 1); + while (true) { + // If it is below 1 MiB, auto-adjusting failed. We could be + // more sophisticated and scale it down even more, but let's + // see if many complain about this version. + // + // FIXME: Displays the scaled memory usage instead + // of the original. + if (opt->dict_size < (UINT32_C(1) << 20)) + memlimit_too_small(memory_usage); + + memory_usage = lzma_raw_encoder_memusage(filters); + if (memory_usage == UINT64_MAX) + message_bug(); + + // Accept it if it is low enough. + if (memory_usage <= memory_limit) + break; + + // Otherwise 1 MiB down and try again. I hope this + // isn't too slow method for cases where the original + // dict_size is very big. + opt->dict_size -= UINT32_C(1) << 20; + } + + // Tell the user that we decreased the dictionary size. + message(V_WARNING, _("Adjusted LZMA%c dictionary size " + "from %s MiB to %s MiB to not exceed " + "the memory usage limit of %s MiB"), + filters[i].id == LZMA_FILTER_LZMA2 + ? '2' : '1', + uint64_to_str(orig_dict_size >> 20, 0), + uint64_to_str(opt->dict_size >> 20, 1), + uint64_to_str(round_up_to_mib(memory_limit), 2)); +#endif + + return; +} + + +#ifdef HAVE_DECODERS +/// Return true if the data in in_buf seems to be in the .xz format. +static bool +is_format_xz(void) +{ + // Specify the magic as hex to be compatible with EBCDIC systems. + static const uint8_t magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 }; + return strm.avail_in >= sizeof(magic) + && memcmp(in_buf.u8, magic, sizeof(magic)) == 0; +} + + +/// Return true if the data in in_buf seems to be in the .lzma format. +static bool +is_format_lzma(void) +{ + // The .lzma header is 13 bytes. + if (strm.avail_in < 13) + return false; + + // Decode the LZMA1 properties. + lzma_filter filter = { .id = LZMA_FILTER_LZMA1 }; + if (lzma_properties_decode(&filter, NULL, in_buf.u8, 5) != LZMA_OK) + return false; + + // A hack to ditch tons of false positives: We allow only dictionary + // sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone + // created only files with 2^n, but accepts any dictionary size. + // If someone complains, this will be reconsidered. + lzma_options_lzma *opt = filter.options; + const uint32_t dict_size = opt->dict_size; + free(opt); + + if (dict_size != UINT32_MAX) { + uint32_t d = dict_size - 1; + d |= d >> 2; + d |= d >> 3; + d |= d >> 4; + d |= d >> 8; + d |= d >> 16; + ++d; + if (d != dict_size || dict_size == 0) + return false; + } + + // Another hack to ditch false positives: Assume that if the + // uncompressed size is known, it must be less than 256 GiB. + // Again, if someone complains, this will be reconsidered. + uint64_t uncompressed_size = 0; + for (size_t i = 0; i < 8; ++i) + uncompressed_size |= (uint64_t)(in_buf.u8[5 + i]) << (i * 8); + + if (uncompressed_size != UINT64_MAX + && uncompressed_size > (UINT64_C(1) << 38)) + return false; + + return true; +} + + +#ifdef HAVE_LZIP_DECODER +/// Return true if the data in in_buf seems to be in the .lz format. +static bool +is_format_lzip(void) +{ + static const uint8_t magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; + return strm.avail_in >= sizeof(magic) + && memcmp(in_buf.u8, magic, sizeof(magic)) == 0; +} +#endif +#endif + + +/// Detect the input file type (for now, this done only when decompressing), +/// and initialize an appropriate coder. Return value indicates if a normal +/// liblzma-based coder was initialized (CODER_INIT_NORMAL), if passthru +/// mode should be used (CODER_INIT_PASSTHRU), or if an error occurred +/// (CODER_INIT_ERROR). +static enum coder_init_ret +coder_init(file_pair *pair) +{ + lzma_ret ret = LZMA_PROG_ERROR; + + // In most cases if there is input left when coding finishes, + // something has gone wrong. Exceptions are --single-stream + // and decoding .lz files which can contain trailing non-.lz data. + // These will be handled later in this function. + allow_trailing_input = false; + + if (opt_mode == MODE_COMPRESS) { +#ifdef HAVE_ENCODERS + switch (opt_format) { + case FORMAT_AUTO: + // args.c ensures this. + assert(0); + break; + + case FORMAT_XZ: +# ifdef MYTHREAD_ENABLED + if (hardware_threads_is_mt()) + ret = lzma_stream_encoder_mt( + &strm, &mt_options); + else +# endif + ret = lzma_stream_encoder( + &strm, filters, check); + break; + + case FORMAT_LZMA: + ret = lzma_alone_encoder(&strm, filters[0].options); + break; + +# ifdef HAVE_LZIP_DECODER + case FORMAT_LZIP: + // args.c should disallow this. + assert(0); + ret = LZMA_PROG_ERROR; + break; +# endif + + case FORMAT_RAW: + ret = lzma_raw_encoder(&strm, filters); + break; + } +#endif + } else { +#ifdef HAVE_DECODERS + uint32_t flags = 0; + + // It seems silly to warn about unsupported check if the + // check won't be verified anyway due to --ignore-check. + if (opt_ignore_check) + flags |= LZMA_IGNORE_CHECK; + else + flags |= LZMA_TELL_UNSUPPORTED_CHECK; + + if (opt_single_stream) + allow_trailing_input = true; + else + flags |= LZMA_CONCATENATED; + + // We abuse FORMAT_AUTO to indicate unknown file format, + // for which we may consider passthru mode. + enum format_type init_format = FORMAT_AUTO; + + switch (opt_format) { + case FORMAT_AUTO: + // .lz is checked before .lzma since .lzma detection + // is more complicated (no magic bytes). + if (is_format_xz()) + init_format = FORMAT_XZ; +# ifdef HAVE_LZIP_DECODER + else if (is_format_lzip()) + init_format = FORMAT_LZIP; +# endif + else if (is_format_lzma()) + init_format = FORMAT_LZMA; + break; + + case FORMAT_XZ: + if (is_format_xz()) + init_format = FORMAT_XZ; + break; + + case FORMAT_LZMA: + if (is_format_lzma()) + init_format = FORMAT_LZMA; + break; + +# ifdef HAVE_LZIP_DECODER + case FORMAT_LZIP: + if (is_format_lzip()) + init_format = FORMAT_LZIP; + break; +# endif + + case FORMAT_RAW: + init_format = FORMAT_RAW; + break; + } + + switch (init_format) { + case FORMAT_AUTO: + // Unknown file format. If --decompress --stdout + // --force have been given, then we copy the input + // as is to stdout. Checking for MODE_DECOMPRESS + // is needed, because we don't want to do use + // passthru mode with --test. + if (opt_mode == MODE_DECOMPRESS + && opt_stdout && opt_force) { + // These are needed for progress info. + strm.total_in = 0; + strm.total_out = 0; + return CODER_INIT_PASSTHRU; + } + + ret = LZMA_FORMAT_ERROR; + break; + + case FORMAT_XZ: +# ifdef MYTHREAD_ENABLED + mt_options.flags = flags; + + mt_options.threads = hardware_threads_get(); + mt_options.memlimit_stop + = hardware_memlimit_get(MODE_DECOMPRESS); + + // If single-threaded mode was requested, set the + // memlimit for threading to zero. This forces the + // decoder to use single-threaded mode which matches + // the behavior of lzma_stream_decoder(). + // + // Otherwise use the limit for threaded decompression + // which has a sane default (users are still free to + // make it insanely high though). + mt_options.memlimit_threading + = mt_options.threads == 1 + ? 0 : hardware_memlimit_mtdec_get(); + + ret = lzma_stream_decoder_mt(&strm, &mt_options); +# else + ret = lzma_stream_decoder(&strm, + hardware_memlimit_get( + MODE_DECOMPRESS), flags); +# endif + break; + + case FORMAT_LZMA: + ret = lzma_alone_decoder(&strm, + hardware_memlimit_get( + MODE_DECOMPRESS)); + break; + +# ifdef HAVE_LZIP_DECODER + case FORMAT_LZIP: + allow_trailing_input = true; + ret = lzma_lzip_decoder(&strm, + hardware_memlimit_get( + MODE_DECOMPRESS), flags); + break; +# endif + + case FORMAT_RAW: + // Memory usage has already been checked in + // coder_set_compression_settings(). + ret = lzma_raw_decoder(&strm, filters); + break; + } + + // Try to decode the headers. This will catch too low + // memory usage limit in case it happens in the first + // Block of the first Stream, which is where it very + // probably will happen if it is going to happen. + // + // This will also catch unsupported check type which + // we treat as a warning only. If there are empty + // concatenated Streams with unsupported check type then + // the message can be shown more than once here. The loop + // is used in case there is first a warning about + // unsupported check type and then the first Block + // would exceed the memlimit. + if (ret == LZMA_OK && init_format != FORMAT_RAW) { + strm.next_out = NULL; + strm.avail_out = 0; + while ((ret = lzma_code(&strm, LZMA_RUN)) + == LZMA_UNSUPPORTED_CHECK) + message_warning("%s: %s", pair->src_name, + message_strm(ret)); + + // With --single-stream lzma_code won't wait for + // LZMA_FINISH and thus it can return LZMA_STREAM_END + // if the file has no uncompressed data inside. + // So treat LZMA_STREAM_END as LZMA_OK here. + // When lzma_code() is called again in coder_normal() + // it will return LZMA_STREAM_END again. + if (ret == LZMA_STREAM_END) + ret = LZMA_OK; + } +#endif + } + + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, message_strm(ret)); + if (ret == LZMA_MEMLIMIT_ERROR) + message_mem_needed(V_ERROR, lzma_memusage(&strm)); + + return CODER_INIT_ERROR; + } + + return CODER_INIT_NORMAL; +} + + +/// Resolve conflicts between opt_block_size and opt_block_list in single +/// threaded mode. We want to default to opt_block_list, except when it is +/// larger than opt_block_size. If this is the case for the current Block +/// at *list_pos, then we break into smaller Blocks. Otherwise advance +/// to the next Block in opt_block_list, and break apart if needed. +static void +split_block(uint64_t *block_remaining, + uint64_t *next_block_remaining, + size_t *list_pos) +{ + if (*next_block_remaining > 0) { + // The Block at *list_pos has previously been split up. + assert(!hardware_threads_is_mt()); + assert(opt_block_size > 0); + assert(opt_block_list != NULL); + + if (*next_block_remaining > opt_block_size) { + // We have to split the current Block at *list_pos + // into another opt_block_size length Block. + *block_remaining = opt_block_size; + } else { + // This is the last remaining split Block for the + // Block at *list_pos. + *block_remaining = *next_block_remaining; + } + + *next_block_remaining -= *block_remaining; + + } else { + // The Block at *list_pos has been finished. Go to the next + // entry in the list. If the end of the list has been reached, + // reuse the size of the last Block. + if (opt_block_list[*list_pos + 1] != 0) + ++*list_pos; + + *block_remaining = opt_block_list[*list_pos]; + + // If in single-threaded mode, split up the Block if needed. + // This is not needed in multi-threaded mode because liblzma + // will do this due to how threaded encoding works. + if (!hardware_threads_is_mt() && opt_block_size > 0 + && *block_remaining > opt_block_size) { + *next_block_remaining + = *block_remaining - opt_block_size; + *block_remaining = opt_block_size; + } + } +} + + +static bool +coder_write_output(file_pair *pair) +{ + if (opt_mode != MODE_TEST) { + if (io_write(pair, &out_buf, IO_BUFFER_SIZE - strm.avail_out)) + return true; + } + + strm.next_out = out_buf.u8; + strm.avail_out = IO_BUFFER_SIZE; + return false; +} + + +/// Compress or decompress using liblzma. +static bool +coder_normal(file_pair *pair) +{ + // Encoder needs to know when we have given all the input to it. + // The decoders need to know it too when we are using + // LZMA_CONCATENATED. We need to check for src_eof here, because + // the first input chunk has been already read if decompressing, + // and that may have been the only chunk we will read. + lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN; + + lzma_ret ret; + + // Assume that something goes wrong. + bool success = false; + + // block_remaining indicates how many input bytes to encode before + // finishing the current .xz Block. The Block size is set with + // --block-size=SIZE and --block-list. They have an effect only when + // compressing to the .xz format. If block_remaining == UINT64_MAX, + // only a single block is created. + uint64_t block_remaining = UINT64_MAX; + + // next_block_remaining for when we are in single-threaded mode and + // the Block in --block-list is larger than the --block-size=SIZE. + uint64_t next_block_remaining = 0; + + // Position in opt_block_list. Unused if --block-list wasn't used. + size_t list_pos = 0; + + // Handle --block-size for single-threaded mode and the first step + // of --block-list. + if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) { + // --block-size doesn't do anything here in threaded mode, + // because the threaded encoder will take care of splitting + // to fixed-sized Blocks. + if (!hardware_threads_is_mt() && opt_block_size > 0) + block_remaining = opt_block_size; + + // If --block-list was used, start with the first size. + // + // For threaded case, --block-size specifies how big Blocks + // the encoder needs to be prepared to create at maximum + // and --block-list will simultaneously cause new Blocks + // to be started at specified intervals. To keep things + // logical, the same is done in single-threaded mode. The + // output is still not identical because in single-threaded + // mode the size info isn't written into Block Headers. + if (opt_block_list != NULL) { + if (block_remaining < opt_block_list[list_pos]) { + assert(!hardware_threads_is_mt()); + next_block_remaining = opt_block_list[list_pos] + - block_remaining; + } else { + block_remaining = opt_block_list[list_pos]; + } + } + } + + strm.next_out = out_buf.u8; + strm.avail_out = IO_BUFFER_SIZE; + + while (!user_abort) { + // Fill the input buffer if it is empty and we aren't + // flushing or finishing. + if (strm.avail_in == 0 && action == LZMA_RUN) { + strm.next_in = in_buf.u8; + strm.avail_in = io_read(pair, &in_buf, + my_min(block_remaining, + IO_BUFFER_SIZE)); + + if (strm.avail_in == SIZE_MAX) + break; + + if (pair->src_eof) { + action = LZMA_FINISH; + + } else if (block_remaining != UINT64_MAX) { + // Start a new Block after every + // opt_block_size bytes of input. + block_remaining -= strm.avail_in; + if (block_remaining == 0) + action = LZMA_FULL_BARRIER; + } + + if (action == LZMA_RUN && pair->flush_needed) + action = LZMA_SYNC_FLUSH; + } + + // Let liblzma do the actual work. + ret = lzma_code(&strm, action); + + // Write out if the output buffer became full. + if (strm.avail_out == 0) { + if (coder_write_output(pair)) + break; + } + + if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH + || action == LZMA_FULL_BARRIER)) { + if (action == LZMA_SYNC_FLUSH) { + // Flushing completed. Write the pending data + // out immediately so that the reading side + // can decompress everything compressed so far. + if (coder_write_output(pair)) + break; + + // Mark that we haven't seen any new input + // since the previous flush. + pair->src_has_seen_input = false; + pair->flush_needed = false; + } else { + // Start a new Block after LZMA_FULL_BARRIER. + if (opt_block_list == NULL) { + assert(!hardware_threads_is_mt()); + assert(opt_block_size > 0); + block_remaining = opt_block_size; + } else { + split_block(&block_remaining, + &next_block_remaining, + &list_pos); + } + } + + // Start a new Block after LZMA_FULL_FLUSH or continue + // the same block after LZMA_SYNC_FLUSH. + action = LZMA_RUN; + + } else if (ret != LZMA_OK) { + // Determine if the return value indicates that we + // won't continue coding. LZMA_NO_CHECK would be + // here too if LZMA_TELL_ANY_CHECK was used. + const bool stop = ret != LZMA_UNSUPPORTED_CHECK; + + if (stop) { + // Write the remaining bytes even if something + // went wrong, because that way the user gets + // as much data as possible, which can be good + // when trying to get at least some useful + // data out of damaged files. + if (coder_write_output(pair)) + break; + } + + if (ret == LZMA_STREAM_END) { + if (allow_trailing_input) { + io_fix_src_pos(pair, strm.avail_in); + success = true; + break; + } + + // Check that there is no trailing garbage. + // This is needed for LZMA_Alone and raw + // streams. This is *not* done with .lz files + // as that format specifically requires + // allowing trailing garbage. + if (strm.avail_in == 0 && !pair->src_eof) { + // Try reading one more byte. + // Hopefully we don't get any more + // input, and thus pair->src_eof + // becomes true. + strm.avail_in = io_read( + pair, &in_buf, 1); + if (strm.avail_in == SIZE_MAX) + break; + + assert(strm.avail_in == 0 + || strm.avail_in == 1); + } + + if (strm.avail_in == 0) { + assert(pair->src_eof); + success = true; + break; + } + + // We hadn't reached the end of the file. + ret = LZMA_DATA_ERROR; + assert(stop); + } + + // If we get here and stop is true, something went + // wrong and we print an error. Otherwise it's just + // a warning and coding can continue. + if (stop) { + message_error("%s: %s", pair->src_name, + message_strm(ret)); + } else { + message_warning("%s: %s", pair->src_name, + message_strm(ret)); + + // When compressing, all possible errors set + // stop to true. + assert(opt_mode != MODE_COMPRESS); + } + + if (ret == LZMA_MEMLIMIT_ERROR) { + // Display how much memory it would have + // actually needed. + message_mem_needed(V_ERROR, + lzma_memusage(&strm)); + } + + if (stop) + break; + } + + // Show progress information under certain conditions. + message_progress_update(); + } + + return success; +} + + +/// Copy from input file to output file without processing the data in any +/// way. This is used only when trying to decompress unrecognized files +/// with --decompress --stdout --force, so the output is always stdout. +static bool +coder_passthru(file_pair *pair) +{ + while (strm.avail_in != 0) { + if (user_abort) + return false; + + if (io_write(pair, &in_buf, strm.avail_in)) + return false; + + strm.total_in += strm.avail_in; + strm.total_out = strm.total_in; + message_progress_update(); + + strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); + if (strm.avail_in == SIZE_MAX) + return false; + } + + return true; +} + + +extern void +coder_run(const char *filename) +{ + // Set and possibly print the filename for the progress message. + message_filename(filename); + + // Try to open the input file. + file_pair *pair = io_open_src(filename); + if (pair == NULL) + return; + + // Assume that something goes wrong. + bool success = false; + + if (opt_mode == MODE_COMPRESS) { + strm.next_in = NULL; + strm.avail_in = 0; + } else { + // Read the first chunk of input data. This is needed + // to detect the input file type. + strm.next_in = in_buf.u8; + strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); + } + + if (strm.avail_in != SIZE_MAX) { + // Initialize the coder. This will detect the file format + // and, in decompression or testing mode, check the memory + // usage of the first Block too. This way we don't try to + // open the destination file if we see that coding wouldn't + // work at all anyway. This also avoids deleting the old + // "target" file if --force was used. + const enum coder_init_ret init_ret = coder_init(pair); + + if (init_ret != CODER_INIT_ERROR && !user_abort) { + // Don't open the destination file when --test + // is used. + if (opt_mode == MODE_TEST || !io_open_dest(pair)) { + // Remember the current time. It is needed + // for progress indicator. + mytime_set_start_time(); + + // Initialize the progress indicator. + // + // NOTE: When reading from stdin, fstat() + // isn't called on it and thus src_st.st_size + // is zero. If stdin pointed to a regular + // file, it would still be possible to know + // the file size but then we would also need + // to take into account the current reading + // position since with stdin it isn't + // necessarily at the beginning of the file. + const bool is_passthru = init_ret + == CODER_INIT_PASSTHRU; + const uint64_t in_size + = pair->src_st.st_size <= 0 + ? 0 : (uint64_t)(pair->src_st.st_size); + message_progress_start(&strm, + is_passthru, in_size); + + // Do the actual coding or passthru. + if (is_passthru) + success = coder_passthru(pair); + else + success = coder_normal(pair); + + message_progress_end(success); + } + } + } + + // Close the file pair. It needs to know if coding was successful to + // know if the source or target file should be unlinked. + io_close(pair, success); + + return; +} + + +#ifndef NDEBUG +extern void +coder_free(void) +{ + lzma_end(&strm); + return; +} +#endif diff --git a/src/xz/coder.h b/src/xz/coder.h new file mode 100644 index 0000000..9e69e57 --- /dev/null +++ b/src/xz/coder.h @@ -0,0 +1,78 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file coder.h +/// \brief Compresses or uncompresses a file +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +enum operation_mode { + MODE_COMPRESS, + MODE_DECOMPRESS, + MODE_TEST, + MODE_LIST, +}; + + +// NOTE: The order of these is significant in suffix.c. +enum format_type { + FORMAT_AUTO, + FORMAT_XZ, + FORMAT_LZMA, +#ifdef HAVE_LZIP_DECODER + FORMAT_LZIP, +#endif + FORMAT_RAW, +}; + + +/// Operation mode of the command line tool. This is set in args.c and read +/// in several files. +extern enum operation_mode opt_mode; + +/// File format to use when encoding or what format(s) to accept when +/// decoding. This is a global because it's needed also in suffix.c. +/// This is set in args.c. +extern enum format_type opt_format; + +/// If true, the compression settings are automatically adjusted down if +/// they exceed the memory usage limit. +extern bool opt_auto_adjust; + +/// If true, stop after decoding the first stream. +extern bool opt_single_stream; + +/// If non-zero, start a new .xz Block after every opt_block_size bytes +/// of input. This has an effect only when compressing to the .xz format. +extern uint64_t opt_block_size; + +/// This is non-NULL if --block-list was used. This contains the Block sizes +/// as an array that is terminated with 0. +extern uint64_t *opt_block_list; + +/// Set the integrity check type used when compressing +extern void coder_set_check(lzma_check check); + +/// Set preset number +extern void coder_set_preset(uint32_t new_preset); + +/// Enable extreme mode +extern void coder_set_extreme(void); + +/// Add a filter to the custom filter chain +extern void coder_add_filter(lzma_vli id, void *options); + +/// +extern void coder_set_compression_settings(void); + +/// Compress or decompress the given file +extern void coder_run(const char *filename); + +#ifndef NDEBUG +/// Free the memory allocated for the coder and kill the worker threads. +extern void coder_free(void); +#endif diff --git a/src/xz/file_io.c b/src/xz/file_io.c new file mode 100644 index 0000000..a5841b3 --- /dev/null +++ b/src/xz/file_io.c @@ -0,0 +1,1366 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file file_io.c +/// \brief File opening, unlinking, and closing +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#include <fcntl.h> + +#ifdef TUKLIB_DOSLIKE +# include <io.h> +#else +# include <poll.h> +static bool warn_fchown; +#endif + +#if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) +# include <sys/time.h> +#elif defined(HAVE__FUTIME) +# include <sys/utime.h> +#elif defined(HAVE_UTIME) +# include <utime.h> +#endif + +#ifdef HAVE_CAPSICUM +# ifdef HAVE_SYS_CAPSICUM_H +# include <sys/capsicum.h> +# else +# include <sys/capability.h> +# endif +#endif + +#include "tuklib_open_stdxxx.h" + +#ifndef O_BINARY +# define O_BINARY 0 +#endif + +#ifndef O_NOCTTY +# define O_NOCTTY 0 +#endif + +// Using this macro to silence a warning from gcc -Wlogical-op. +#if EAGAIN == EWOULDBLOCK +# define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN) +#else +# define IS_EAGAIN_OR_EWOULDBLOCK(e) \ + ((e) == EAGAIN || (e) == EWOULDBLOCK) +#endif + + +typedef enum { + IO_WAIT_MORE, // Reading or writing is possible. + IO_WAIT_ERROR, // Error or user_abort + IO_WAIT_TIMEOUT, // poll() timed out +} io_wait_ret; + + +/// If true, try to create sparse files when decompressing. +static bool try_sparse = true; + +#ifdef ENABLE_SANDBOX +/// True if the conditions for sandboxing (described in main()) have been met. +static bool sandbox_allowed = false; +#endif + +#ifndef TUKLIB_DOSLIKE +/// File status flags of standard input. This is used by io_open_src() +/// and io_close_src(). +static int stdin_flags; +static bool restore_stdin_flags = false; + +/// Original file status flags of standard output. This is used by +/// io_open_dest() and io_close_dest() to save and restore the flags. +static int stdout_flags; +static bool restore_stdout_flags = false; + +/// Self-pipe used together with the user_abort variable to avoid +/// race conditions with signal handling. +static int user_abort_pipe[2]; +#endif + + +static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); + + +extern void +io_init(void) +{ + // Make sure that stdin, stdout, and stderr are connected to + // a valid file descriptor. Exit immediately with exit code ERROR + // if we cannot make the file descriptors valid. Maybe we should + // print an error message, but our stderr could be screwed anyway. + tuklib_open_stdxxx(E_ERROR); + +#ifndef TUKLIB_DOSLIKE + // If fchown() fails setting the owner, we warn about it only if + // we are root. + warn_fchown = geteuid() == 0; + + // Create a pipe for the self-pipe trick. + if (pipe(user_abort_pipe)) + message_fatal(_("Error creating a pipe: %s"), + strerror(errno)); + + // Make both ends of the pipe non-blocking. + for (unsigned i = 0; i < 2; ++i) { + int flags = fcntl(user_abort_pipe[i], F_GETFL); + if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL, + flags | O_NONBLOCK) == -1) + message_fatal(_("Error creating a pipe: %s"), + strerror(errno)); + } +#endif + +#ifdef __DJGPP__ + // Avoid doing useless things when statting files. + // This isn't important but doesn't hurt. + _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; +#endif + + return; +} + + +#ifndef TUKLIB_DOSLIKE +extern void +io_write_to_user_abort_pipe(void) +{ + // If the write() fails, it's probably due to the pipe being full. + // Failing in that case is fine. If the reason is something else, + // there's not much we can do since this is called in a signal + // handler. So ignore the errors and try to avoid warnings with + // GCC and glibc when _FORTIFY_SOURCE=2 is used. + uint8_t b = '\0'; + const int ret = write(user_abort_pipe[1], &b, 1); + (void)ret; + return; +} +#endif + + +extern void +io_no_sparse(void) +{ + try_sparse = false; + return; +} + + +#ifdef ENABLE_SANDBOX +extern void +io_allow_sandbox(void) +{ + sandbox_allowed = true; + return; +} + + +/// Enables operating-system-specific sandbox if it is possible. +/// src_fd is the file descriptor of the input file. +static void +io_sandbox_enter(int src_fd) +{ + if (!sandbox_allowed) { + // This message is more often annoying than useful so + // it's commented out. It can be useful when developing + // the sandboxing code. + //message(V_DEBUG, _("Sandbox is disabled due " + // "to incompatible command line arguments")); + return; + } + + const char dummy_str[] = "x"; + + // Try to ensure that both libc and xz locale files have been + // loaded when NLS is enabled. + snprintf(NULL, 0, "%s%s", _(dummy_str), strerror(EINVAL)); + + // Try to ensure that iconv data files needed for handling multibyte + // characters have been loaded. This is needed at least with glibc. + tuklib_mbstr_width(dummy_str, NULL); + +#ifdef HAVE_CAPSICUM + // Capsicum needs FreeBSD 10.0 or later. + cap_rights_t rights; + + if (cap_rights_limit(src_fd, cap_rights_init(&rights, + CAP_EVENT, CAP_FCNTL, CAP_LOOKUP, CAP_READ, CAP_SEEK))) + goto error; + + if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights, + CAP_EVENT, CAP_FCNTL, CAP_FSTAT, CAP_LOOKUP, + CAP_WRITE, CAP_SEEK))) + goto error; + + if (cap_rights_limit(user_abort_pipe[0], cap_rights_init(&rights, + CAP_EVENT))) + goto error; + + if (cap_rights_limit(user_abort_pipe[1], cap_rights_init(&rights, + CAP_WRITE))) + goto error; + + if (cap_enter()) + goto error; + +#elif defined(HAVE_PLEDGE) + // pledge() was introduced in OpenBSD 5.9. + // + // main() unconditionally calls pledge() with fairly relaxed + // promises which work in all situations. Here we make the + // sandbox more strict. + if (pledge("stdio", "")) + goto error; + + (void)src_fd; + +#else +# error ENABLE_SANDBOX is defined but no sandboxing method was found. +#endif + + // This message is annoying in xz -lvv. + //message(V_DEBUG, _("Sandbox was successfully enabled")); + return; + +error: + message_fatal(_("Failed to enable the sandbox")); +} +#endif // ENABLE_SANDBOX + + +#ifndef TUKLIB_DOSLIKE +/// \brief Waits for input or output to become available or for a signal +/// +/// This uses the self-pipe trick to avoid a race condition that can occur +/// if a signal is caught after user_abort has been checked but before e.g. +/// read() has been called. In that situation read() could block unless +/// non-blocking I/O is used. With non-blocking I/O something like select() +/// or poll() is needed to avoid a busy-wait loop, and the same race condition +/// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in +/// POSIX) but neither is portable enough in 2013. The self-pipe trick is +/// old and very portable. +static io_wait_ret +io_wait(file_pair *pair, int timeout, bool is_reading) +{ + struct pollfd pfd[2]; + + if (is_reading) { + pfd[0].fd = pair->src_fd; + pfd[0].events = POLLIN; + } else { + pfd[0].fd = pair->dest_fd; + pfd[0].events = POLLOUT; + } + + pfd[1].fd = user_abort_pipe[0]; + pfd[1].events = POLLIN; + + while (true) { + const int ret = poll(pfd, 2, timeout); + + if (user_abort) + return IO_WAIT_ERROR; + + if (ret == -1) { + if (errno == EINTR || errno == EAGAIN) + continue; + + message_error(_("%s: poll() failed: %s"), + is_reading ? pair->src_name + : pair->dest_name, + strerror(errno)); + return IO_WAIT_ERROR; + } + + if (ret == 0) + return IO_WAIT_TIMEOUT; + + if (pfd[0].revents != 0) + return IO_WAIT_MORE; + } +} +#endif + + +/// \brief Unlink a file +/// +/// This tries to verify that the file being unlinked really is the file that +/// we want to unlink by verifying device and inode numbers. There's still +/// a small unavoidable race, but this is much better than nothing (the file +/// could have been moved/replaced even hours earlier). +static void +io_unlink(const char *name, const struct stat *known_st) +{ +#if defined(TUKLIB_DOSLIKE) + // On DOS-like systems, st_ino is meaningless, so don't bother + // testing it. Just silence a compiler warning. + (void)known_st; +#else + struct stat new_st; + + // If --force was used, use stat() instead of lstat(). This way + // (de)compressing symlinks works correctly. However, it also means + // that xz cannot detect if a regular file foo is renamed to bar + // and then a symlink foo -> bar is created. Because of stat() + // instead of lstat(), xz will think that foo hasn't been replaced + // with another file. Thus, xz will remove foo even though it no + // longer is the same file that xz used when it started compressing. + // Probably it's not too bad though, so this doesn't need a more + // complex fix. + const int stat_ret = opt_force + ? stat(name, &new_st) : lstat(name, &new_st); + + if (stat_ret +# ifdef __VMS + // st_ino is an array, and we don't want to + // compare st_dev at all. + || memcmp(&new_st.st_ino, &known_st->st_ino, + sizeof(new_st.st_ino)) != 0 +# else + // Typical POSIX-like system + || new_st.st_dev != known_st->st_dev + || new_st.st_ino != known_st->st_ino +# endif + ) + // TRANSLATORS: When compression or decompression finishes, + // and xz is going to remove the source file, xz first checks + // if the source file still exists, and if it does, does its + // device and inode numbers match what xz saw when it opened + // the source file. If these checks fail, this message is + // shown, %s being the filename, and the file is not deleted. + // The check for device and inode numbers is there, because + // it is possible that the user has put a new file in place + // of the original file, and in that case it obviously + // shouldn't be removed. + message_warning(_("%s: File seems to have been moved, " + "not removing"), name); + else +#endif + // There's a race condition between lstat() and unlink() + // but at least we have tried to avoid removing wrong file. + if (unlink(name)) + message_warning(_("%s: Cannot remove: %s"), + name, strerror(errno)); + + return; +} + + +/// \brief Copies owner/group and permissions +/// +/// \todo ACL and EA support +/// +static void +io_copy_attrs(const file_pair *pair) +{ + // Skip chown and chmod on Windows. +#ifndef TUKLIB_DOSLIKE + // This function is more tricky than you may think at first. + // Blindly copying permissions may permit users to access the + // destination file who didn't have permission to access the + // source file. + + // Try changing the owner of the file. If we aren't root or the owner + // isn't already us, fchown() probably doesn't succeed. We warn + // about failing fchown() only if we are root. + if (fchown(pair->dest_fd, pair->src_st.st_uid, (gid_t)(-1)) + && warn_fchown) + message_warning(_("%s: Cannot set the file owner: %s"), + pair->dest_name, strerror(errno)); + + mode_t mode; + + // With BSD semantics the new dest file may have a group that + // does not belong to the user. If the src file has the same gid + // nothing has to be done. Nevertheless OpenBSD fchown(2) fails + // in this case which seems to be POSIX compliant. As there is + // nothing to do, skip the system call. + if (pair->dest_st.st_gid != pair->src_st.st_gid + && fchown(pair->dest_fd, (uid_t)(-1), + pair->src_st.st_gid)) { + message_warning(_("%s: Cannot set the file group: %s"), + pair->dest_name, strerror(errno)); + // We can still safely copy some additional permissions: + // `group' must be at least as strict as `other' and + // also vice versa. + // + // NOTE: After this, the owner of the source file may + // get additional permissions. This shouldn't be too bad, + // because the owner would have had permission to chmod + // the original file anyway. + mode = ((pair->src_st.st_mode & 0070) >> 3) + & (pair->src_st.st_mode & 0007); + mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; + } else { + // Drop the setuid, setgid, and sticky bits. + mode = pair->src_st.st_mode & 0777; + } + + if (fchmod(pair->dest_fd, mode)) + message_warning(_("%s: Cannot set the file permissions: %s"), + pair->dest_name, strerror(errno)); +#endif + + // Copy the timestamps. We have several possible ways to do this, of + // which some are better in both security and precision. + // + // First, get the nanosecond part of the timestamps. As of writing, + // it's not standardized by POSIX, and there are several names for + // the same thing in struct stat. + long atime_nsec; + long mtime_nsec; + +# if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) + // GNU and Solaris + atime_nsec = pair->src_st.st_atim.tv_nsec; + mtime_nsec = pair->src_st.st_mtim.tv_nsec; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) + // BSD + atime_nsec = pair->src_st.st_atimespec.tv_nsec; + mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) + // GNU and BSD without extensions + atime_nsec = pair->src_st.st_atimensec; + mtime_nsec = pair->src_st.st_mtimensec; + +# elif defined(HAVE_STRUCT_STAT_ST_UATIME) + // Tru64 + atime_nsec = pair->src_st.st_uatime * 1000; + mtime_nsec = pair->src_st.st_umtime * 1000; + +# elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) + // UnixWare + atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; + mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; + +# else + // Safe fallback + atime_nsec = 0; + mtime_nsec = 0; +# endif + + // Construct a structure to hold the timestamps and call appropriate + // function to set the timestamps. +#if defined(HAVE_FUTIMENS) + // Use nanosecond precision. + struct timespec tv[2]; + tv[0].tv_sec = pair->src_st.st_atime; + tv[0].tv_nsec = atime_nsec; + tv[1].tv_sec = pair->src_st.st_mtime; + tv[1].tv_nsec = mtime_nsec; + + (void)futimens(pair->dest_fd, tv); + +#elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) + // Use microsecond precision. + struct timeval tv[2]; + tv[0].tv_sec = pair->src_st.st_atime; + tv[0].tv_usec = atime_nsec / 1000; + tv[1].tv_sec = pair->src_st.st_mtime; + tv[1].tv_usec = mtime_nsec / 1000; + +# if defined(HAVE_FUTIMES) + (void)futimes(pair->dest_fd, tv); +# elif defined(HAVE_FUTIMESAT) + (void)futimesat(pair->dest_fd, NULL, tv); +# else + // Argh, no function to use a file descriptor to set the timestamp. + (void)utimes(pair->dest_name, tv); +# endif + +#elif defined(HAVE__FUTIME) + // Use one-second precision with Windows-specific _futime(). + // We could use utime() too except that for some reason the + // timestamp will get reset at close(). With _futime() it works. + // This struct cannot be const as _futime() takes a non-const pointer. + struct _utimbuf buf = { + .actime = pair->src_st.st_atime, + .modtime = pair->src_st.st_mtime, + }; + + // Avoid warnings. + (void)atime_nsec; + (void)mtime_nsec; + + (void)_futime(pair->dest_fd, &buf); + +#elif defined(HAVE_UTIME) + // Use one-second precision. utime() doesn't support using file + // descriptor either. Some systems have broken utime() prototype + // so don't make this const. + struct utimbuf buf = { + .actime = pair->src_st.st_atime, + .modtime = pair->src_st.st_mtime, + }; + + // Avoid warnings. + (void)atime_nsec; + (void)mtime_nsec; + + (void)utime(pair->dest_name, &buf); +#endif + + return; +} + + +/// Opens the source file. Returns false on success, true on error. +static bool +io_open_src_real(file_pair *pair) +{ + // There's nothing to open when reading from stdin. + if (pair->src_name == stdin_filename) { + pair->src_fd = STDIN_FILENO; +#ifdef TUKLIB_DOSLIKE + setmode(STDIN_FILENO, O_BINARY); +#else + // Try to set stdin to non-blocking mode. It won't work + // e.g. on OpenBSD if stdout is e.g. /dev/null. In such + // case we proceed as if stdin were non-blocking anyway + // (in case of /dev/null it will be in practice). The + // same applies to stdout in io_open_dest_real(). + stdin_flags = fcntl(STDIN_FILENO, F_GETFL); + if (stdin_flags == -1) { + message_error(_("Error getting the file status flags " + "from standard input: %s"), + strerror(errno)); + return true; + } + + if ((stdin_flags & O_NONBLOCK) == 0 + && fcntl(STDIN_FILENO, F_SETFL, + stdin_flags | O_NONBLOCK) != -1) + restore_stdin_flags = true; +#endif +#ifdef HAVE_POSIX_FADVISE + // It will fail if stdin is a pipe and that's fine. + (void)posix_fadvise(STDIN_FILENO, 0, 0, + opt_mode == MODE_LIST + ? POSIX_FADV_RANDOM + : POSIX_FADV_SEQUENTIAL); +#endif + return false; + } + + // Symlinks are not followed unless writing to stdout or --force + // or --keep was used. + const bool follow_symlinks + = opt_stdout || opt_force || opt_keep_original; + + // We accept only regular files if we are writing the output + // to disk too. bzip2 allows overriding this with --force but + // gzip and xz don't. + const bool reg_files_only = !opt_stdout; + + // Flags for open() + int flags = O_RDONLY | O_BINARY | O_NOCTTY; + +#ifndef TUKLIB_DOSLIKE + // Use non-blocking I/O: + // - It prevents blocking when opening FIFOs and some other + // special files, which is good if we want to accept only + // regular files. + // - It can help avoiding some race conditions with signal handling. + flags |= O_NONBLOCK; +#endif + +#if defined(O_NOFOLLOW) + if (!follow_symlinks) + flags |= O_NOFOLLOW; +#elif !defined(TUKLIB_DOSLIKE) + // Some POSIX-like systems lack O_NOFOLLOW (it's not required + // by POSIX). Check for symlinks with a separate lstat() on + // these systems. + if (!follow_symlinks) { + struct stat st; + if (lstat(pair->src_name, &st)) { + message_error("%s: %s", pair->src_name, + strerror(errno)); + return true; + + } else if (S_ISLNK(st.st_mode)) { + message_warning(_("%s: Is a symbolic link, " + "skipping"), pair->src_name); + return true; + } + } +#else + // Avoid warnings. + (void)follow_symlinks; +#endif + + // Try to open the file. Signals have been blocked so EINTR shouldn't + // be possible. + pair->src_fd = open(pair->src_name, flags); + + if (pair->src_fd == -1) { + // Signals (that have a signal handler) have been blocked. + assert(errno != EINTR); + +#ifdef O_NOFOLLOW + // Give an understandable error message if the reason + // for failing was that the file was a symbolic link. + // + // Note that at least Linux, OpenBSD, Solaris, and Darwin + // use ELOOP to indicate that O_NOFOLLOW was the reason + // that open() failed. Because there may be + // directories in the pathname, ELOOP may occur also + // because of a symlink loop in the directory part. + // So ELOOP doesn't tell us what actually went wrong, + // and this stupidity went into POSIX-1.2008 too. + // + // FreeBSD associates EMLINK with O_NOFOLLOW and + // Tru64 uses ENOTSUP. We use these directly here + // and skip the lstat() call and the associated race. + // I want to hear if there are other kernels that + // fail with something else than ELOOP with O_NOFOLLOW. + bool was_symlink = false; + +# if defined(__FreeBSD__) || defined(__DragonFly__) + if (errno == EMLINK) + was_symlink = true; + +# elif defined(__digital__) && defined(__unix__) + if (errno == ENOTSUP) + was_symlink = true; + +# elif defined(__NetBSD__) + if (errno == EFTYPE) + was_symlink = true; + +# else + if (errno == ELOOP && !follow_symlinks) { + const int saved_errno = errno; + struct stat st; + if (lstat(pair->src_name, &st) == 0 + && S_ISLNK(st.st_mode)) + was_symlink = true; + + errno = saved_errno; + } +# endif + + if (was_symlink) + message_warning(_("%s: Is a symbolic link, " + "skipping"), pair->src_name); + else +#endif + // Something else than O_NOFOLLOW failing + // (assuming that the race conditions didn't + // confuse us). + message_error("%s: %s", pair->src_name, + strerror(errno)); + + return true; + } + + // Stat the source file. We need the result also when we copy + // the permissions, and when unlinking. + // + // NOTE: Use stat() instead of fstat() with DJGPP, because + // then we have a better chance to get st_ino value that can + // be used in io_open_dest_real() to prevent overwriting the + // source file. +#ifdef __DJGPP__ + if (stat(pair->src_name, &pair->src_st)) + goto error_msg; +#else + if (fstat(pair->src_fd, &pair->src_st)) + goto error_msg; +#endif + + if (S_ISDIR(pair->src_st.st_mode)) { + message_warning(_("%s: Is a directory, skipping"), + pair->src_name); + goto error; + } + + if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) { + message_warning(_("%s: Not a regular file, skipping"), + pair->src_name); + goto error; + } + +#ifndef TUKLIB_DOSLIKE + if (reg_files_only && !opt_force && !opt_keep_original) { + if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { + // gzip rejects setuid and setgid files even + // when --force was used. bzip2 doesn't check + // for them, but calls fchown() after fchmod(), + // and many systems automatically drop setuid + // and setgid bits there. + // + // We accept setuid and setgid files if + // --force or --keep was used. We drop these bits + // explicitly in io_copy_attr(). + message_warning(_("%s: File has setuid or " + "setgid bit set, skipping"), + pair->src_name); + goto error; + } + + if (pair->src_st.st_mode & S_ISVTX) { + message_warning(_("%s: File has sticky bit " + "set, skipping"), + pair->src_name); + goto error; + } + + if (pair->src_st.st_nlink > 1) { + message_warning(_("%s: Input file has more " + "than one hard link, " + "skipping"), pair->src_name); + goto error; + } + } + + // If it is something else than a regular file, wait until + // there is input available. This way reading from FIFOs + // will work when open() is used with O_NONBLOCK. + if (!S_ISREG(pair->src_st.st_mode)) { + signals_unblock(); + const io_wait_ret ret = io_wait(pair, -1, true); + signals_block(); + + if (ret != IO_WAIT_MORE) + goto error; + } +#endif + +#ifdef HAVE_POSIX_FADVISE + // It will fail with some special files like FIFOs but that is fine. + (void)posix_fadvise(pair->src_fd, 0, 0, + opt_mode == MODE_LIST + ? POSIX_FADV_RANDOM + : POSIX_FADV_SEQUENTIAL); +#endif + + return false; + +error_msg: + message_error("%s: %s", pair->src_name, strerror(errno)); +error: + (void)close(pair->src_fd); + return true; +} + + +extern file_pair * +io_open_src(const char *src_name) +{ + if (src_name[0] == '\0') { + message_error(_("Empty filename, skipping")); + return NULL; + } + + // Since we have only one file open at a time, we can use + // a statically allocated structure. + static file_pair pair; + + // This implicitly also initializes src_st.st_size to zero + // which is expected to be <= 0 by default. fstat() isn't + // called when reading from standard input but src_st.st_size + // is still read. + pair = (file_pair){ + .src_name = src_name, + .dest_name = NULL, + .src_fd = -1, + .dest_fd = -1, + .src_eof = false, + .src_has_seen_input = false, + .flush_needed = false, + .dest_try_sparse = false, + .dest_pending_sparse = 0, + }; + + // Block the signals, for which we have a custom signal handler, so + // that we don't need to worry about EINTR. + signals_block(); + const bool error = io_open_src_real(&pair); + signals_unblock(); + +#ifdef ENABLE_SANDBOX + if (!error) + io_sandbox_enter(pair.src_fd); +#endif + + return error ? NULL : &pair; +} + + +/// \brief Closes source file of the file_pair structure +/// +/// \param pair File whose src_fd should be closed +/// \param success If true, the file will be removed from the disk if +/// closing succeeds and --keep hasn't been used. +static void +io_close_src(file_pair *pair, bool success) +{ +#ifndef TUKLIB_DOSLIKE + if (restore_stdin_flags) { + assert(pair->src_fd == STDIN_FILENO); + + restore_stdin_flags = false; + + if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1) + message_error(_("Error restoring the status flags " + "to standard input: %s"), + strerror(errno)); + } +#endif + + if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { + // Close the file before possibly unlinking it. On DOS-like + // systems this is always required since unlinking will fail + // if the file is open. On POSIX systems it usually works + // to unlink open files, but in some cases it doesn't and + // one gets EBUSY in errno. + // + // xz 5.2.2 and older unlinked the file before closing it + // (except on DOS-like systems). The old code didn't handle + // EBUSY and could fail e.g. on some CIFS shares. The + // advantage of unlinking before closing is negligible + // (avoids a race between close() and stat()/lstat() and + // unlink()), so let's keep this simple. + (void)close(pair->src_fd); + + if (success && !opt_keep_original) + io_unlink(pair->src_name, &pair->src_st); + } + + return; +} + + +static bool +io_open_dest_real(file_pair *pair) +{ + if (opt_stdout || pair->src_fd == STDIN_FILENO) { + // We don't modify or free() this. + pair->dest_name = (char *)"(stdout)"; + pair->dest_fd = STDOUT_FILENO; +#ifdef TUKLIB_DOSLIKE + setmode(STDOUT_FILENO, O_BINARY); +#else + // Try to set O_NONBLOCK if it isn't already set. + // If it fails, we assume that stdout is non-blocking + // in practice. See the comments in io_open_src_real() + // for similar situation with stdin. + // + // NOTE: O_APPEND may be unset later in this function + // and it relies on stdout_flags being set here. + stdout_flags = fcntl(STDOUT_FILENO, F_GETFL); + if (stdout_flags == -1) { + message_error(_("Error getting the file status flags " + "from standard output: %s"), + strerror(errno)); + return true; + } + + if ((stdout_flags & O_NONBLOCK) == 0 + && fcntl(STDOUT_FILENO, F_SETFL, + stdout_flags | O_NONBLOCK) != -1) + restore_stdout_flags = true; +#endif + } else { + pair->dest_name = suffix_get_dest_name(pair->src_name); + if (pair->dest_name == NULL) + return true; + +#ifdef __DJGPP__ + struct stat st; + if (stat(pair->dest_name, &st) == 0) { + // Check that it isn't a special file like "prn". + if (st.st_dev == -1) { + message_error("%s: Refusing to write to " + "a DOS special file", + pair->dest_name); + free(pair->dest_name); + return true; + } + + // Check that we aren't overwriting the source file. + if (st.st_dev == pair->src_st.st_dev + && st.st_ino == pair->src_st.st_ino) { + message_error("%s: Output file is the same " + "as the input file", + pair->dest_name); + free(pair->dest_name); + return true; + } + } +#endif + + // If --force was used, unlink the target file first. + if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { + message_error(_("%s: Cannot remove: %s"), + pair->dest_name, strerror(errno)); + free(pair->dest_name); + return true; + } + + // Open the file. + int flags = O_WRONLY | O_BINARY | O_NOCTTY + | O_CREAT | O_EXCL; +#ifndef TUKLIB_DOSLIKE + flags |= O_NONBLOCK; +#endif + const mode_t mode = S_IRUSR | S_IWUSR; + pair->dest_fd = open(pair->dest_name, flags, mode); + + if (pair->dest_fd == -1) { + message_error("%s: %s", pair->dest_name, + strerror(errno)); + free(pair->dest_name); + return true; + } + } + +#ifndef TUKLIB_DOSLIKE + // dest_st isn't used on DOS-like systems except as a dummy + // argument to io_unlink(), so don't fstat() on such systems. + if (fstat(pair->dest_fd, &pair->dest_st)) { + // If fstat() really fails, we have a safe fallback here. +# if defined(__VMS) + pair->dest_st.st_ino[0] = 0; + pair->dest_st.st_ino[1] = 0; + pair->dest_st.st_ino[2] = 0; +# else + pair->dest_st.st_dev = 0; + pair->dest_st.st_ino = 0; +# endif + } else if (try_sparse && opt_mode == MODE_DECOMPRESS) { + // When writing to standard output, we need to be extra + // careful: + // - It may be connected to something else than + // a regular file. + // - We aren't necessarily writing to a new empty file + // or to the end of an existing file. + // - O_APPEND may be active. + // + // TODO: I'm keeping this disabled for DOS-like systems + // for now. FAT doesn't support sparse files, but NTFS + // does, so maybe this should be enabled on Windows after + // some testing. + if (pair->dest_fd == STDOUT_FILENO) { + if (!S_ISREG(pair->dest_st.st_mode)) + return false; + + if (stdout_flags & O_APPEND) { + // Creating a sparse file is not possible + // when O_APPEND is active (it's used by + // shell's >> redirection). As I understand + // it, it is safe to temporarily disable + // O_APPEND in xz, because if someone + // happened to write to the same file at the + // same time, results would be bad anyway + // (users shouldn't assume that xz uses any + // specific block size when writing data). + // + // The write position may be something else + // than the end of the file, so we must fix + // it to start writing at the end of the file + // to imitate O_APPEND. + if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) + return false; + + // Construct the new file status flags. + // If O_NONBLOCK was set earlier in this + // function, it must be kept here too. + int flags = stdout_flags & ~O_APPEND; + if (restore_stdout_flags) + flags |= O_NONBLOCK; + + // If this fcntl() fails, we continue but won't + // try to create sparse output. The original + // flags will still be restored if needed (to + // unset O_NONBLOCK) when the file is finished. + if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1) + return false; + + // Disabling O_APPEND succeeded. Mark + // that the flags should be restored + // in io_close_dest(). (This may have already + // been set when enabling O_NONBLOCK.) + restore_stdout_flags = true; + + } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) + != pair->dest_st.st_size) { + // Writing won't start exactly at the end + // of the file. We cannot use sparse output, + // because it would probably corrupt the file. + return false; + } + } + + pair->dest_try_sparse = true; + } +#endif + + return false; +} + + +extern bool +io_open_dest(file_pair *pair) +{ + signals_block(); + const bool ret = io_open_dest_real(pair); + signals_unblock(); + return ret; +} + + +/// \brief Closes destination file of the file_pair structure +/// +/// \param pair File whose dest_fd should be closed +/// \param success If false, the file will be removed from the disk. +/// +/// \return Zero if closing succeeds. On error, -1 is returned and +/// error message printed. +static bool +io_close_dest(file_pair *pair, bool success) +{ +#ifndef TUKLIB_DOSLIKE + // If io_open_dest() has disabled O_APPEND, restore it here. + if (restore_stdout_flags) { + assert(pair->dest_fd == STDOUT_FILENO); + + restore_stdout_flags = false; + + if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) { + message_error(_("Error restoring the O_APPEND flag " + "to standard output: %s"), + strerror(errno)); + return true; + } + } +#endif + + if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) + return false; + + if (close(pair->dest_fd)) { + message_error(_("%s: Closing the file failed: %s"), + pair->dest_name, strerror(errno)); + + // Closing destination file failed, so we cannot trust its + // contents. Get rid of junk: + io_unlink(pair->dest_name, &pair->dest_st); + free(pair->dest_name); + return true; + } + + // If the operation using this file wasn't successful, we git rid + // of the junk file. + if (!success) + io_unlink(pair->dest_name, &pair->dest_st); + + free(pair->dest_name); + + return false; +} + + +extern void +io_close(file_pair *pair, bool success) +{ + // Take care of sparseness at the end of the output file. + if (success && pair->dest_try_sparse + && pair->dest_pending_sparse > 0) { + // Seek forward one byte less than the size of the pending + // hole, then write one zero-byte. This way the file grows + // to its correct size. An alternative would be to use + // ftruncate() but that isn't portable enough (e.g. it + // doesn't work with FAT on Linux; FAT isn't that important + // since it doesn't support sparse files anyway, but we don't + // want to create corrupt files on it). + if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, + SEEK_CUR) == -1) { + message_error(_("%s: Seeking failed when trying " + "to create a sparse file: %s"), + pair->dest_name, strerror(errno)); + success = false; + } else { + const uint8_t zero[1] = { '\0' }; + if (io_write_buf(pair, zero, 1)) + success = false; + } + } + + signals_block(); + + // Copy the file attributes. We need to skip this if destination + // file isn't open or it is standard output. + if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) + io_copy_attrs(pair); + + // Close the destination first. If it fails, we must not remove + // the source file! + if (io_close_dest(pair, success)) + success = false; + + // Close the source file, and unlink it if the operation using this + // file pair was successful and we haven't requested to keep the + // source file. + io_close_src(pair, success); + + signals_unblock(); + + return; +} + + +extern void +io_fix_src_pos(file_pair *pair, size_t rewind_size) +{ + assert(rewind_size <= IO_BUFFER_SIZE); + + if (rewind_size > 0) { + // This doesn't need to work on unseekable file descriptors, + // so just ignore possible errors. + (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR); + } + + return; +} + + +extern size_t +io_read(file_pair *pair, io_buf *buf, size_t size) +{ + // We use small buffers here. + assert(size < SSIZE_MAX); + + size_t pos = 0; + + while (pos < size) { + const ssize_t amount = read( + pair->src_fd, buf->u8 + pos, size - pos); + + if (amount == 0) { + pair->src_eof = true; + break; + } + + if (amount == -1) { + if (errno == EINTR) { + if (user_abort) + return SIZE_MAX; + + continue; + } + +#ifndef TUKLIB_DOSLIKE + if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { + // Disable the flush-timeout if no input has + // been seen since the previous flush and thus + // there would be nothing to flush after the + // timeout expires (avoids busy waiting). + const int timeout = pair->src_has_seen_input + ? mytime_get_flush_timeout() + : -1; + + switch (io_wait(pair, timeout, true)) { + case IO_WAIT_MORE: + continue; + + case IO_WAIT_ERROR: + return SIZE_MAX; + + case IO_WAIT_TIMEOUT: + pair->flush_needed = true; + return pos; + + default: + message_bug(); + } + } +#endif + + message_error(_("%s: Read error: %s"), + pair->src_name, strerror(errno)); + + return SIZE_MAX; + } + + pos += (size_t)(amount); + + if (!pair->src_has_seen_input) { + pair->src_has_seen_input = true; + mytime_set_flush_time(); + } + } + + return pos; +} + + +extern bool +io_seek_src(file_pair *pair, uint64_t pos) +{ + // Caller must not attempt to seek past the end of the input file + // (seeking to 100 in a 100-byte file is seeking to the end of + // the file, not past the end of the file, and thus that is allowed). + // + // This also validates that pos can be safely cast to off_t. + if (pos > (uint64_t)(pair->src_st.st_size)) + message_bug(); + + if (lseek(pair->src_fd, (off_t)(pos), SEEK_SET) == -1) { + message_error(_("%s: Error seeking the file: %s"), + pair->src_name, strerror(errno)); + return true; + } + + pair->src_eof = false; + + return false; +} + + +extern bool +io_pread(file_pair *pair, io_buf *buf, size_t size, uint64_t pos) +{ + // Using lseek() and read() is more portable than pread() and + // for us it is as good as real pread(). + if (io_seek_src(pair, pos)) + return true; + + const size_t amount = io_read(pair, buf, size); + if (amount == SIZE_MAX) + return true; + + if (amount != size) { + message_error(_("%s: Unexpected end of file"), + pair->src_name); + return true; + } + + return false; +} + + +static bool +is_sparse(const io_buf *buf) +{ + assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); + + for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) + if (buf->u64[i] != 0) + return false; + + return true; +} + + +static bool +io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) +{ + assert(size < SSIZE_MAX); + + while (size > 0) { + const ssize_t amount = write(pair->dest_fd, buf, size); + if (amount == -1) { + if (errno == EINTR) { + if (user_abort) + return true; + + continue; + } + +#ifndef TUKLIB_DOSLIKE + if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { + if (io_wait(pair, -1, false) == IO_WAIT_MORE) + continue; + + return true; + } +#endif + + // Handle broken pipe specially. gzip and bzip2 + // don't print anything on SIGPIPE. In addition, + // gzip --quiet uses exit status 2 (warning) on + // broken pipe instead of whatever raise(SIGPIPE) + // would make it return. It is there to hide "Broken + // pipe" message on some old shells (probably old + // GNU bash). + // + // We don't do anything special with --quiet, which + // is what bzip2 does too. If we get SIGPIPE, we + // will handle it like other signals by setting + // user_abort, and get EPIPE here. + if (errno != EPIPE) + message_error(_("%s: Write error: %s"), + pair->dest_name, strerror(errno)); + + return true; + } + + buf += (size_t)(amount); + size -= (size_t)(amount); + } + + return false; +} + + +extern bool +io_write(file_pair *pair, const io_buf *buf, size_t size) +{ + assert(size <= IO_BUFFER_SIZE); + + if (pair->dest_try_sparse) { + // Check if the block is sparse (contains only zeros). If it + // sparse, we just store the amount and return. We will take + // care of actually skipping over the hole when we hit the + // next data block or close the file. + // + // Since io_close() requires that dest_pending_sparse > 0 + // if the file ends with sparse block, we must also return + // if size == 0 to avoid doing the lseek(). + if (size == IO_BUFFER_SIZE) { + // Even if the block was sparse, treat it as non-sparse + // if the pending sparse amount is large compared to + // the size of off_t. In practice this only matters + // on 32-bit systems where off_t isn't always 64 bits. + const off_t pending_max + = (off_t)(1) << (sizeof(off_t) * CHAR_BIT - 2); + if (is_sparse(buf) && pair->dest_pending_sparse + < pending_max) { + pair->dest_pending_sparse += (off_t)(size); + return false; + } + } else if (size == 0) { + return false; + } + + // This is not a sparse block. If we have a pending hole, + // skip it now. + if (pair->dest_pending_sparse > 0) { + if (lseek(pair->dest_fd, pair->dest_pending_sparse, + SEEK_CUR) == -1) { + message_error(_("%s: Seeking failed when " + "trying to create a sparse " + "file: %s"), pair->dest_name, + strerror(errno)); + return true; + } + + pair->dest_pending_sparse = 0; + } + } + + return io_write_buf(pair, buf->u8, size); +} diff --git a/src/xz/file_io.h b/src/xz/file_io.h new file mode 100644 index 0000000..8a9e336 --- /dev/null +++ b/src/xz/file_io.h @@ -0,0 +1,179 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file file_io.h +/// \brief I/O types and functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them. +// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t)) +#if BUFSIZ <= 1024 +# define IO_BUFFER_SIZE 8192 +#else +# define IO_BUFFER_SIZE (BUFSIZ & ~7U) +#endif + + +/// is_sparse() accesses the buffer as uint64_t for maximum speed. +/// The u32 and u64 members must only be access through this union +/// to avoid strict aliasing violations. Taking a pointer of u8 +/// should be fine as long as uint8_t maps to unsigned char which +/// can alias anything. +typedef union { + uint8_t u8[IO_BUFFER_SIZE]; + uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)]; + uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)]; +} io_buf; + + +typedef struct { + /// Name of the source filename (as given on the command line) or + /// pointer to static "(stdin)" when reading from standard input. + const char *src_name; + + /// Destination filename converted from src_name or pointer to static + /// "(stdout)" when writing to standard output. + char *dest_name; + + /// File descriptor of the source file + int src_fd; + + /// File descriptor of the target file + int dest_fd; + + /// True once end of the source file has been detected. + bool src_eof; + + /// For --flush-timeout: True if at least one byte has been read + /// since the previous flush or the start of the file. + bool src_has_seen_input; + + /// For --flush-timeout: True when flushing is needed. + bool flush_needed; + + /// If true, we look for long chunks of zeros and try to create + /// a sparse file. + bool dest_try_sparse; + + /// This is used only if dest_try_sparse is true. This holds the + /// number of zero bytes we haven't written out, because we plan + /// to make that byte range a sparse chunk. + off_t dest_pending_sparse; + + /// Stat of the source file. + struct stat src_st; + + /// Stat of the destination file. + struct stat dest_st; + +} file_pair; + + +/// \brief Initialize the I/O module +extern void io_init(void); + + +#ifndef TUKLIB_DOSLIKE +/// \brief Write a byte to user_abort_pipe[1] +/// +/// This is called from a signal handler. +extern void io_write_to_user_abort_pipe(void); +#endif + + +/// \brief Disable creation of sparse files when decompressing +extern void io_no_sparse(void); + + +#ifdef ENABLE_SANDBOX +/// \brief main() calls this if conditions for sandboxing have been met. +extern void io_allow_sandbox(void); +#endif + + +/// \brief Open the source file +extern file_pair *io_open_src(const char *src_name); + + +/// \brief Open the destination file +extern bool io_open_dest(file_pair *pair); + + +/// \brief Closes the file descriptors and frees possible allocated memory +/// +/// The success argument determines if source or destination file gets +/// unlinked: +/// - false: The destination file is unlinked. +/// - true: The source file is unlinked unless writing to stdout or --keep +/// was used. +extern void io_close(file_pair *pair, bool success); + + +/// \brief Reads from the source file to a buffer +/// +/// \param pair File pair having the source file open for reading +/// \param buf Destination buffer to hold the read data +/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX +/// +/// \return On success, number of bytes read is returned. On end of +/// file zero is returned and pair->src_eof set to true. +/// On error, SIZE_MAX is returned and error message printed. +extern size_t io_read(file_pair *pair, io_buf *buf, size_t size); + + +/// \brief Fix the position in src_fd +/// +/// This is used when --single-thream has been specified and decompression +/// is successful. If the input file descriptor supports seeking, this +/// function fixes the input position to point to the next byte after the +/// decompressed stream. +/// +/// \param pair File pair having the source file open for reading +/// \param rewind_size How many bytes of extra have been read i.e. +/// how much to seek backwards. +extern void io_fix_src_pos(file_pair *pair, size_t rewind_size); + + +/// \brief Seek to the given absolute position in the source file +/// +/// This calls lseek() and also clears pair->src_eof. +/// +/// \param pair Seekable source file +/// \param pos Offset relative to the beginning of the file, +/// from which the data should be read. +/// +/// \return On success, false is returned. On error, error message +/// is printed and true is returned. +extern bool io_seek_src(file_pair *pair, uint64_t pos); + + +/// \brief Read from source file from given offset to a buffer +/// +/// This is remotely similar to standard pread(). This uses lseek() though, +/// so the read offset is changed on each call. +/// +/// \param pair Seekable source file +/// \param buf Destination buffer +/// \param size Amount of data to read +/// \param pos Offset relative to the beginning of the file, +/// from which the data should be read. +/// +/// \return On success, false is returned. On error, error message +/// is printed and true is returned. +extern bool io_pread(file_pair *pair, io_buf *buf, size_t size, uint64_t pos); + + +/// \brief Writes a buffer to the destination file +/// +/// \param pair File pair having the destination file open for writing +/// \param buf Buffer containing the data to be written +/// \param size Size of the buffer; assumed be smaller than SSIZE_MAX +/// +/// \return On success, zero is returned. On error, -1 is returned +/// and error message printed. +extern bool io_write(file_pair *pair, const io_buf *buf, size_t size); diff --git a/src/xz/hardware.c b/src/xz/hardware.c new file mode 100644 index 0000000..ccdc3b9 --- /dev/null +++ b/src/xz/hardware.c @@ -0,0 +1,338 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hardware.c +/// \brief Detection of available hardware resources +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +/// Maximum number of worker threads. This can be set with +/// the --threads=NUM command line option. +static uint32_t threads_max = 1; + +/// True when the number of threads is automatically determined based +/// on the available hardware threads. +static bool threads_are_automatic = false; + +/// If true, then try to use multi-threaded mode (if memlimit allows) +/// even if only one thread was requested explicitly (-T+1). +static bool use_mt_mode_with_one_thread = false; + +/// Memory usage limit for compression +static uint64_t memlimit_compress = 0; + +/// Memory usage limit for decompression +static uint64_t memlimit_decompress = 0; + +/// Default memory usage for multithreaded modes: +/// +/// - Default value for --memlimit-compress when automatic number of threads +/// is used. However, if the limit wouldn't allow even one thread then +/// the limit is ignored in coder.c and one thread will be used anyway. +/// This mess is a compromise: we wish to prevent -T0 from using too +/// many threads but we also don't want xz to give an error due to +/// a memlimit that the user didn't explicitly set. +/// +/// - Default value for --memlimit-mt-decompress +/// +/// This value is caluclated in hardware_init() and cannot be changed later. +static uint64_t memlimit_mt_default; + +/// Memory usage limit for multithreaded decompression. This is a soft limit: +/// if reducing the number of threads to one isn't enough to keep memory +/// usage below this limit, then one thread is used and this limit is ignored. +/// memlimit_decompress is still obeyed. +/// +/// This can be set with --memlimit-mt-decompress. The default value for +/// this is memlimit_mt_default. +static uint64_t memlimit_mtdec; + +/// Total amount of physical RAM +static uint64_t total_ram; + + +extern void +hardware_threads_set(uint32_t n) +{ + // Reset these to false first and set them to true when appropriate. + threads_are_automatic = false; + use_mt_mode_with_one_thread = false; + + if (n == 0) { + // Automatic number of threads was requested. + // If there is only one hardware thread, multi-threaded + // mode will still be used if memory limit allows. + threads_are_automatic = true; + use_mt_mode_with_one_thread = true; + + // If threading support was enabled at build time, + // use the number of available CPU cores. Otherwise + // use one thread since disabling threading support + // omits lzma_cputhreads() from liblzma. +#ifdef MYTHREAD_ENABLED + threads_max = lzma_cputhreads(); + if (threads_max == 0) + threads_max = 1; +#else + threads_max = 1; +#endif + } else if (n == UINT32_MAX) { + use_mt_mode_with_one_thread = true; + threads_max = 1; + } else { + threads_max = n; + } + + return; +} + + +extern uint32_t +hardware_threads_get(void) +{ + return threads_max; +} + + +extern bool +hardware_threads_is_mt(void) +{ +#ifdef MYTHREAD_ENABLED + return threads_max > 1 || use_mt_mode_with_one_thread; +#else + return false; +#endif +} + + +extern void +hardware_memlimit_set(uint64_t new_memlimit, + bool set_compress, bool set_decompress, bool set_mtdec, + bool is_percentage) +{ + if (is_percentage) { + assert(new_memlimit > 0); + assert(new_memlimit <= 100); + new_memlimit = (uint32_t)new_memlimit * total_ram / 100; + } + + if (set_compress) { + memlimit_compress = new_memlimit; + +#if SIZE_MAX == UINT32_MAX + // FIXME? + // + // When running a 32-bit xz on a system with a lot of RAM and + // using a percentage-based memory limit, the result can be + // bigger than the 32-bit address space. Limiting the limit + // below SIZE_MAX for compression (not decompression) makes + // xz lower the compression settings (or number of threads) + // to a level that *might* work. In practice it has worked + // when using a 64-bit kernel that gives full 4 GiB address + // space to 32-bit programs. In other situations this might + // still be too high, like 32-bit kernels that may give much + // less than 4 GiB to a single application. + // + // So this is an ugly hack but I will keep it here while + // it does more good than bad. + // + // Use a value less than SIZE_MAX so that there's some room + // for the xz program and so on. Don't use 4000 MiB because + // it could look like someone mixed up base-2 and base-10. +#ifdef __mips__ + // For MIPS32, due to architectural pecularities, + // the limit is even lower. + const uint64_t limit_max = UINT64_C(2000) << 20; +#else + const uint64_t limit_max = UINT64_C(4020) << 20; +#endif + + // UINT64_MAX is a special case for the string "max" so + // that has to be handled specially. + if (memlimit_compress != UINT64_MAX + && memlimit_compress > limit_max) + memlimit_compress = limit_max; +#endif + } + + if (set_decompress) + memlimit_decompress = new_memlimit; + + if (set_mtdec) + memlimit_mtdec = new_memlimit; + + return; +} + + +extern uint64_t +hardware_memlimit_get(enum operation_mode mode) +{ + // 0 is a special value that indicates the default. + // It disables the limit in single-threaded mode. + // + // NOTE: For multithreaded decompression, this is the hard limit + // (memlimit_stop). hardware_memlimit_mtdec_get() gives the + // soft limit (memlimit_threaded). + const uint64_t memlimit = mode == MODE_COMPRESS + ? memlimit_compress : memlimit_decompress; + return memlimit != 0 ? memlimit : UINT64_MAX; +} + + +extern uint64_t +hardware_memlimit_mtenc_get(void) +{ + return hardware_memlimit_mtenc_is_default() + ? memlimit_mt_default + : hardware_memlimit_get(MODE_COMPRESS); +} + + +extern bool +hardware_memlimit_mtenc_is_default(void) +{ + return memlimit_compress == 0 && threads_are_automatic; +} + + +extern uint64_t +hardware_memlimit_mtdec_get(void) +{ + uint64_t m = memlimit_mtdec != 0 + ? memlimit_mtdec + : memlimit_mt_default; + + // Cap the value to memlimit_decompress if it has been specified. + // This is nice for --info-memory. It wouldn't be needed for liblzma + // since it does this anyway. + if (memlimit_decompress != 0 && m > memlimit_decompress) + m = memlimit_decompress; + + return m; +} + + +/// Helper for hardware_memlimit_show() to print one human-readable info line. +static void +memlimit_show(const char *str, size_t str_columns, uint64_t value) +{ + // Calculate the field width so that str will be padded to take + // str_columns on the terminal. + // + // NOTE: If the string is invalid, this will be -1. Using -1 as + // the field width is fine here so it's not handled specially. + const int fw = tuklib_mbstr_fw(str, (int)(str_columns)); + + // The memory usage limit is considered to be disabled if value + // is 0 or UINT64_MAX. This might get a bit more complex once there + // is threading support. See the comment in hardware_memlimit_get(). + if (value == 0 || value == UINT64_MAX) + printf(" %-*s %s\n", fw, str, _("Disabled")); + else + printf(" %-*s %s MiB (%s B)\n", fw, str, + uint64_to_str(round_up_to_mib(value), 0), + uint64_to_str(value, 1)); + + return; +} + + +extern void +hardware_memlimit_show(void) +{ + uint32_t cputhreads = 1; +#ifdef MYTHREAD_ENABLED + cputhreads = lzma_cputhreads(); + if (cputhreads == 0) + cputhreads = 1; +#endif + + if (opt_robot) { + printf("%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu32 "\n", + total_ram, + memlimit_compress, + memlimit_decompress, + hardware_memlimit_mtdec_get(), + memlimit_mt_default, + cputhreads); + } else { + const char *msgs[] = { + _("Amount of physical memory (RAM):"), + _("Number of processor threads:"), + _("Compression:"), + _("Decompression:"), + _("Multi-threaded decompression:"), + _("Default for -T0:"), + }; + + size_t width_max = 1; + for (unsigned i = 0; i < ARRAY_SIZE(msgs); ++i) { + size_t w = tuklib_mbstr_width(msgs[i], NULL); + + // When debugging, catch invalid strings with + // an assertion. Otherwise fallback to 1 so + // that the columns just won't be aligned. + assert(w != (size_t)-1); + if (w == (size_t)-1) + w = 1; + + if (width_max < w) + width_max = w; + } + + puts(_("Hardware information:")); + memlimit_show(msgs[0], width_max, total_ram); + printf(" %-*s %" PRIu32 "\n", + tuklib_mbstr_fw(msgs[1], (int)(width_max)), + msgs[1], cputhreads); + + putchar('\n'); + puts(_("Memory usage limits:")); + memlimit_show(msgs[2], width_max, memlimit_compress); + memlimit_show(msgs[3], width_max, memlimit_decompress); + memlimit_show(msgs[4], width_max, + hardware_memlimit_mtdec_get()); + memlimit_show(msgs[5], width_max, memlimit_mt_default); + } + + tuklib_exit(E_SUCCESS, E_ERROR, message_verbosity_get() != V_SILENT); +} + + +extern void +hardware_init(void) +{ + // Get the amount of RAM. If we cannot determine it, + // use the assumption defined by the configure script. + total_ram = lzma_physmem(); + if (total_ram == 0) + total_ram = (uint64_t)(ASSUME_RAM) * 1024 * 1024; + + // FIXME? There may be better methods to determine the default value. + // One Linux-specific suggestion is to use MemAvailable from + // /proc/meminfo as the starting point. + memlimit_mt_default = total_ram / 4; + +#if SIZE_MAX == UINT32_MAX + // A too high value may cause 32-bit xz to run out of address space. + // Use a conservative maximum value here. A few typical address space + // sizes with Linux: + // - x86-64 with 32-bit xz: 4 GiB + // - x86: 3 GiB + // - MIPS32: 2 GiB + const size_t mem_ceiling = 1400U << 20; + if (memlimit_mt_default > mem_ceiling) + memlimit_mt_default = mem_ceiling; +#endif + + return; +} diff --git a/src/xz/hardware.h b/src/xz/hardware.h new file mode 100644 index 0000000..2bb3d7b --- /dev/null +++ b/src/xz/hardware.h @@ -0,0 +1,74 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file hardware.h +/// \brief Detection of available hardware resources +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// Initialize some hardware-specific variables, which are needed by other +/// hardware_* functions. +extern void hardware_init(void); + + +/// Set the maximum number of worker threads. +/// A special value of UINT32_MAX sets one thread in multi-threaded mode. +extern void hardware_threads_set(uint32_t threadlimit); + +/// Get the maximum number of worker threads. +extern uint32_t hardware_threads_get(void); + +/// Returns true if multithreaded mode should be used for .xz compression. +/// This can be true even if the number of threads is one. +extern bool hardware_threads_is_mt(void); + + +/// Set the memory usage limit. There are separate limits for compression, +/// decompression (also includes --list), and multithreaded decompression. +/// Any combination of these can be set with a single call to this function. +/// Zero indicates resetting the limit back to the defaults. +/// The limit can also be set as a percentage of installed RAM; the +/// percentage must be in the range [1, 100]. +extern void hardware_memlimit_set(uint64_t new_memlimit, + bool set_compress, bool set_decompress, bool set_mtdec, + bool is_percentage); + +/// Get the current memory usage limit for compression or decompression. +/// This is a hard limit that will not be exceeded. This is obeyed in +/// both single-threaded and multithreaded modes. +extern uint64_t hardware_memlimit_get(enum operation_mode mode); + +/// This returns a system-specific default value if all of the following +/// conditions are true: +/// +/// - An automatic number of threads was requested (--threads=0). +/// +/// - --memlimit-compress wasn't used or it was reset to the default +/// value by setting it to 0. +/// +/// Otherwise this is identical to hardware_memlimit_get(MODE_COMPRESS). +/// +/// The idea is to keep automatic thread count reasonable so that too +/// high memory usage is avoided and, with 32-bit xz, running out of +/// address space is avoided. +extern uint64_t hardware_memlimit_mtenc_get(void); + +/// Returns true if the value returned by hardware_memlimit_mtenc_get() is +/// a system-specific default value. coder.c uses this to ignore the default +/// memlimit in case it's too small even for a single thread in multithreaded +/// mode. This way the default limit will never make xz fail or affect the +/// compressed output; it will only make xz reduce the number of threads. +extern bool hardware_memlimit_mtenc_is_default(void); + +/// Get the current memory usage limit for multithreaded decompression. +/// This is only used to reduce the number of threads. This limit can be +/// exceeded if the number of threads are reduce to one. Then the value +/// from hardware_memlimit_get() will be honored like in single-threaded mode. +extern uint64_t hardware_memlimit_mtdec_get(void); + +/// Display the amount of RAM and memory usage limits and exit. +extern void hardware_memlimit_show(void) lzma_attribute((__noreturn__)); diff --git a/src/xz/list.c b/src/xz/list.c new file mode 100644 index 0000000..ee61aeb --- /dev/null +++ b/src/xz/list.c @@ -0,0 +1,1317 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file list.c +/// \brief Listing information about .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include "tuklib_integer.h" + + +/// Information about a .xz file +typedef struct { + /// Combined Index of all Streams in the file + lzma_index *idx; + + /// Total amount of Stream Padding + uint64_t stream_padding; + + /// Highest memory usage so far + uint64_t memusage_max; + + /// True if all Blocks so far have Compressed Size and + /// Uncompressed Size fields + bool all_have_sizes; + + /// Oldest XZ Utils version that will decompress the file + uint32_t min_version; + +} xz_file_info; + +#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 } + + +/// Information about a .xz Block +typedef struct { + /// Size of the Block Header + uint32_t header_size; + + /// A few of the Block Flags as a string + char flags[3]; + + /// Size of the Compressed Data field in the Block + lzma_vli compressed_size; + + /// Decoder memory usage for this Block + uint64_t memusage; + + /// The filter chain of this Block in human-readable form + char *filter_chain; + +} block_header_info; + +#define BLOCK_HEADER_INFO_INIT { .filter_chain = NULL } +#define block_header_info_end(bhi) free((bhi)->filter_chain) + + +/// Strings ending in a colon. These are used for lines like +/// " Foo: 123 MiB". These are grouped because translated strings +/// may have different maximum string length, and we want to pad all +/// strings so that the values are aligned nicely. +static const char *colon_strs[] = { + N_("Streams:"), + N_("Blocks:"), + N_("Compressed size:"), + N_("Uncompressed size:"), + N_("Ratio:"), + N_("Check:"), + N_("Stream Padding:"), + N_("Memory needed:"), + N_("Sizes in headers:"), + // This won't be aligned because it's so long: + //N_("Minimum XZ Utils version:"), + N_("Number of files:"), +}; + +/// Enum matching the above strings. +enum { + COLON_STR_STREAMS, + COLON_STR_BLOCKS, + COLON_STR_COMPRESSED_SIZE, + COLON_STR_UNCOMPRESSED_SIZE, + COLON_STR_RATIO, + COLON_STR_CHECK, + COLON_STR_STREAM_PADDING, + COLON_STR_MEMORY_NEEDED, + COLON_STR_SIZES_IN_HEADERS, + //COLON_STR_MINIMUM_XZ_VERSION, + COLON_STR_NUMBER_OF_FILES, +}; + +/// Field widths to use with printf to pad the strings to use the same number +/// of columns on a terminal. +static int colon_strs_fw[ARRAY_SIZE(colon_strs)]; + +/// Convenience macro to get the translated string and its field width +/// using a COLON_STR_foo enum. +#define COLON_STR(num) colon_strs_fw[num], _(colon_strs[num]) + + +/// Column headings +static struct { + /// Table column heading string + const char *str; + + /// Number of terminal-columns to use for this table-column. + /// If a translated string is longer than the initial value, + /// this value will be increased in init_headings(). + int columns; + + /// Field width to use for printf() to pad "str" to use "columns" + /// number of columns on a terminal. This is calculated in + /// init_headings(). + int fw; + +} headings[] = { + { N_("Stream"), 6, 0 }, + { N_("Block"), 9, 0 }, + { N_("Blocks"), 9, 0 }, + { N_("CompOffset"), 15, 0 }, + { N_("UncompOffset"), 15, 0 }, + { N_("CompSize"), 15, 0 }, + { N_("UncompSize"), 15, 0 }, + { N_("TotalSize"), 15, 0 }, + { N_("Ratio"), 5, 0 }, + { N_("Check"), 10, 0 }, + { N_("CheckVal"), 1, 0 }, + { N_("Padding"), 7, 0 }, + { N_("Header"), 5, 0 }, + { N_("Flags"), 2, 0 }, + { N_("MemUsage"), 7 + 4, 0 }, // +4 is for " MiB" + { N_("Filters"), 1, 0 }, +}; + +/// Enum matching the above strings. +enum { + HEADING_STREAM, + HEADING_BLOCK, + HEADING_BLOCKS, + HEADING_COMPOFFSET, + HEADING_UNCOMPOFFSET, + HEADING_COMPSIZE, + HEADING_UNCOMPSIZE, + HEADING_TOTALSIZE, + HEADING_RATIO, + HEADING_CHECK, + HEADING_CHECKVAL, + HEADING_PADDING, + HEADING_HEADERSIZE, + HEADING_HEADERFLAGS, + HEADING_MEMUSAGE, + HEADING_FILTERS, +}; + +#define HEADING_STR(num) headings[num].fw, _(headings[num].str) + + +/// Check ID to string mapping +static const char check_names[LZMA_CHECK_ID_MAX + 1][12] = { + // TRANSLATORS: Indicates that there is no integrity check. + // This string is used in tables. In older xz version this + // string was limited to ten columns in a fixed-width font, but + // nowadays there is no strict length restriction anymore. + N_("None"), + "CRC32", + // TRANSLATORS: Indicates that integrity check name is not known, + // but the Check ID is known (here 2). In older xz version these + // strings were limited to ten columns in a fixed-width font, but + // nowadays there is no strict length restriction anymore. + N_("Unknown-2"), + N_("Unknown-3"), + "CRC64", + N_("Unknown-5"), + N_("Unknown-6"), + N_("Unknown-7"), + N_("Unknown-8"), + N_("Unknown-9"), + "SHA-256", + N_("Unknown-11"), + N_("Unknown-12"), + N_("Unknown-13"), + N_("Unknown-14"), + N_("Unknown-15"), +}; + +/// Buffer size for get_check_names(). This may be a bit ridiculous, +/// but at least it's enough if some language needs many multibyte chars. +#define CHECKS_STR_SIZE 1024 + + +/// Value of the Check field as hexadecimal string. +/// This is set by parse_check_value(). +static char check_value[2 * LZMA_CHECK_SIZE_MAX + 1]; + + +/// Totals that are displayed if there was more than one file. +/// The "files" counter is also used in print_info_adv() to show +/// the file number. +static struct { + uint64_t files; + uint64_t streams; + uint64_t blocks; + uint64_t compressed_size; + uint64_t uncompressed_size; + uint64_t stream_padding; + uint64_t memusage_max; + uint32_t checks; + uint32_t min_version; + bool all_have_sizes; +} totals = { 0, 0, 0, 0, 0, 0, 0, 0, 50000002, true }; + + +/// Initialize colon_strs_fw[]. +static void +init_colon_strs(void) +{ + // Lengths of translated strings as bytes. + size_t lens[ARRAY_SIZE(colon_strs)]; + + // Lengths of translated strings as columns. + size_t widths[ARRAY_SIZE(colon_strs)]; + + // Maximum number of columns needed by a translated string. + size_t width_max = 0; + + for (unsigned i = 0; i < ARRAY_SIZE(colon_strs); ++i) { + widths[i] = tuklib_mbstr_width(_(colon_strs[i]), &lens[i]); + + // If debugging is enabled, catch invalid strings with + // an assertion. However, when not debugging, use the + // byte count as the fallback width. This shouldn't + // ever happen unless there is a bad string in the + // translations, but in such case I guess it's better + // to try to print something useful instead of failing + // completely. + assert(widths[i] != (size_t)-1); + if (widths[i] == (size_t)-1) + widths[i] = lens[i]; + + if (widths[i] > width_max) + width_max = widths[i]; + } + + // Calculate the field width for printf("%*s") so that the strings + // will use width_max columns on a terminal. + for (unsigned i = 0; i < ARRAY_SIZE(colon_strs); ++i) + colon_strs_fw[i] = (int)(lens[i] + width_max - widths[i]); + + return; +} + + +/// Initialize headings[]. +static void +init_headings(void) +{ + // Before going through the heading strings themselves, treat + // the Check heading specially: Look at the widths of the various + // check names and increase the width of the Check column if needed. + // The width of the heading name "Check" will then be handled normally + // with other heading names in the second loop in this function. + for (unsigned i = 0; i < ARRAY_SIZE(check_names); ++i) { + size_t len; + size_t w = tuklib_mbstr_width(_(check_names[i]), &len); + + // Error handling like in init_colon_strs(). + assert(w != (size_t)-1); + if (w == (size_t)-1) + w = len; + + // If the translated string is wider than the minimum width + // set at compile time, increase the width. + if ((size_t)(headings[HEADING_CHECK].columns) < w) + headings[HEADING_CHECK].columns = w; + } + + for (unsigned i = 0; i < ARRAY_SIZE(headings); ++i) { + size_t len; + size_t w = tuklib_mbstr_width(_(headings[i].str), &len); + + // Error handling like in init_colon_strs(). + assert(w != (size_t)-1); + if (w == (size_t)-1) + w = len; + + // If the translated string is wider than the minimum width + // set at compile time, increase the width. + if ((size_t)(headings[i].columns) < w) + headings[i].columns = w; + + // Calculate the field width for printf("%*s") so that + // the string uses .columns number of columns on a terminal. + headings[i].fw = (int)(len + (size_t)headings[i].columns - w); + } + + return; +} + + +/// Initialize the printf field widths that are needed to get nicely aligned +/// output with translated strings. +static void +init_field_widths(void) +{ + init_colon_strs(); + init_headings(); + return; +} + + +/// Convert XZ Utils version number to a string. +static const char * +xz_ver_to_str(uint32_t ver) +{ + static char buf[32]; + + unsigned int major = ver / 10000000U; + ver -= major * 10000000U; + + unsigned int minor = ver / 10000U; + ver -= minor * 10000U; + + unsigned int patch = ver / 10U; + ver -= patch * 10U; + + const char *stability = ver == 0 ? "alpha" : ver == 1 ? "beta" : ""; + + snprintf(buf, sizeof(buf), "%u.%u.%u%s", + major, minor, patch, stability); + return buf; +} + + +/// \brief Parse the Index(es) from the given .xz file +/// +/// \param xfi Pointer to structure where the decoded information +/// is stored. +/// \param pair Input file +/// +/// \return On success, false is returned. On error, true is returned. +/// +static bool +parse_indexes(xz_file_info *xfi, file_pair *pair) +{ + if (pair->src_st.st_size <= 0) { + message_error(_("%s: File is empty"), pair->src_name); + return true; + } + + if (pair->src_st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) { + message_error(_("%s: Too small to be a valid .xz file"), + pair->src_name); + return true; + } + + io_buf buf; + lzma_stream strm = LZMA_STREAM_INIT; + lzma_index *idx = NULL; + + lzma_ret ret = lzma_file_info_decoder(&strm, &idx, + hardware_memlimit_get(MODE_LIST), + (uint64_t)(pair->src_st.st_size)); + if (ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, message_strm(ret)); + return true; + } + + while (true) { + if (strm.avail_in == 0) { + strm.next_in = buf.u8; + strm.avail_in = io_read(pair, &buf, IO_BUFFER_SIZE); + if (strm.avail_in == SIZE_MAX) + goto error; + } + + ret = lzma_code(&strm, LZMA_RUN); + + switch (ret) { + case LZMA_OK: + break; + + case LZMA_SEEK_NEEDED: + // liblzma won't ask us to seek past the known size + // of the input file. + assert(strm.seek_pos + <= (uint64_t)(pair->src_st.st_size)); + if (io_seek_src(pair, strm.seek_pos)) + goto error; + + // avail_in must be zero so that we will read new + // input. + strm.avail_in = 0; + break; + + case LZMA_STREAM_END: { + lzma_end(&strm); + xfi->idx = idx; + + // Calculate xfi->stream_padding. + lzma_index_iter iter; + lzma_index_iter_init(&iter, xfi->idx); + while (!lzma_index_iter_next(&iter, + LZMA_INDEX_ITER_STREAM)) + xfi->stream_padding += iter.stream.padding; + + return false; + } + + default: + message_error("%s: %s", pair->src_name, + message_strm(ret)); + + // If the error was too low memory usage limit, + // show also how much memory would have been needed. + if (ret == LZMA_MEMLIMIT_ERROR) + message_mem_needed(V_ERROR, + lzma_memusage(&strm)); + + goto error; + } + } + +error: + lzma_end(&strm); + return true; +} + + +/// \brief Parse the Block Header +/// +/// The result is stored into *bhi. The caller takes care of initializing it. +/// +/// \return False on success, true on error. +static bool +parse_block_header(file_pair *pair, const lzma_index_iter *iter, + block_header_info *bhi, xz_file_info *xfi) +{ +#if IO_BUFFER_SIZE < LZMA_BLOCK_HEADER_SIZE_MAX +# error IO_BUFFER_SIZE < LZMA_BLOCK_HEADER_SIZE_MAX +#endif + + // Get the whole Block Header with one read, but don't read past + // the end of the Block (or even its Check field). + const uint32_t size = my_min(iter->block.total_size + - lzma_check_size(iter->stream.flags->check), + LZMA_BLOCK_HEADER_SIZE_MAX); + io_buf buf; + if (io_pread(pair, &buf, size, iter->block.compressed_file_offset)) + return true; + + // Zero would mean Index Indicator and thus not a valid Block. + if (buf.u8[0] == 0) + goto data_error; + + // Initialize the block structure and decode Block Header Size. + lzma_filter filters[LZMA_FILTERS_MAX + 1]; + lzma_block block; + block.version = 0; + block.check = iter->stream.flags->check; + block.filters = filters; + + block.header_size = lzma_block_header_size_decode(buf.u8[0]); + if (block.header_size > size) + goto data_error; + + // Decode the Block Header. + switch (lzma_block_header_decode(&block, NULL, buf.u8)) { + case LZMA_OK: + break; + + case LZMA_OPTIONS_ERROR: + message_error("%s: %s", pair->src_name, + message_strm(LZMA_OPTIONS_ERROR)); + return true; + + case LZMA_DATA_ERROR: + goto data_error; + + default: + message_bug(); + } + + // Check the Block Flags. These must be done before calling + // lzma_block_compressed_size(), because it overwrites + // block.compressed_size. + // + // NOTE: If you add new characters here, update the minimum number of + // columns in headings[HEADING_HEADERFLAGS] to match the number of + // characters used here. + bhi->flags[0] = block.compressed_size != LZMA_VLI_UNKNOWN + ? 'c' : '-'; + bhi->flags[1] = block.uncompressed_size != LZMA_VLI_UNKNOWN + ? 'u' : '-'; + bhi->flags[2] = '\0'; + + // Collect information if all Blocks have both Compressed Size + // and Uncompressed Size fields. They can be useful e.g. for + // multi-threaded decompression so it can be useful to know it. + xfi->all_have_sizes &= block.compressed_size != LZMA_VLI_UNKNOWN + && block.uncompressed_size != LZMA_VLI_UNKNOWN; + + // Validate or set block.compressed_size. + switch (lzma_block_compressed_size(&block, + iter->block.unpadded_size)) { + case LZMA_OK: + // Validate also block.uncompressed_size if it is present. + // If it isn't present, there's no need to set it since + // we aren't going to actually decompress the Block; if + // we were decompressing, then we should set it so that + // the Block decoder could validate the Uncompressed Size + // that was stored in the Index. + if (block.uncompressed_size == LZMA_VLI_UNKNOWN + || block.uncompressed_size + == iter->block.uncompressed_size) + break; + + // If the above fails, the file is corrupt so + // LZMA_DATA_ERROR is a good error code. + + // Fall through + + case LZMA_DATA_ERROR: + // Free the memory allocated by lzma_block_header_decode(). + lzma_filters_free(filters, NULL); + goto data_error; + + default: + message_bug(); + } + + // Copy the known sizes. + bhi->header_size = block.header_size; + bhi->compressed_size = block.compressed_size; + + // Calculate the decoder memory usage and update the maximum + // memory usage of this Block. + bhi->memusage = lzma_raw_decoder_memusage(filters); + if (xfi->memusage_max < bhi->memusage) + xfi->memusage_max = bhi->memusage; + + // Determine the minimum XZ Utils version that supports this Block. + // + // - ARM64 filter needs 5.4.0. + // + // - 5.0.0 doesn't support empty LZMA2 streams and thus empty + // Blocks that use LZMA2. This decoder bug was fixed in 5.0.2. + if (xfi->min_version < 50040002U) { + for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) { + if (filters[i].id == LZMA_FILTER_ARM64) { + xfi->min_version = 50040002U; + break; + } + } + } + + if (xfi->min_version < 50000022U) { + size_t i = 0; + while (filters[i + 1].id != LZMA_VLI_UNKNOWN) + ++i; + + if (filters[i].id == LZMA_FILTER_LZMA2 + && iter->block.uncompressed_size == 0) + xfi->min_version = 50000022U; + } + + // Convert the filter chain to human readable form. + const lzma_ret str_ret = lzma_str_from_filters( + &bhi->filter_chain, filters, + LZMA_STR_DECODER | LZMA_STR_GETOPT_LONG, NULL); + + // Free the memory allocated by lzma_block_header_decode(). + lzma_filters_free(filters, NULL); + + // Check if the stringification succeeded. + if (str_ret != LZMA_OK) { + message_error("%s: %s", pair->src_name, message_strm(str_ret)); + return true; + } + + return false; + +data_error: + // Show the error message. + message_error("%s: %s", pair->src_name, + message_strm(LZMA_DATA_ERROR)); + return true; +} + + +/// \brief Parse the Check field and put it into check_value[] +/// +/// \return False on success, true on error. +static bool +parse_check_value(file_pair *pair, const lzma_index_iter *iter) +{ + // Don't read anything from the file if there is no integrity Check. + if (iter->stream.flags->check == LZMA_CHECK_NONE) { + snprintf(check_value, sizeof(check_value), "---"); + return false; + } + + // Locate and read the Check field. + const uint32_t size = lzma_check_size(iter->stream.flags->check); + const uint64_t offset = iter->block.compressed_file_offset + + iter->block.total_size - size; + io_buf buf; + if (io_pread(pair, &buf, size, offset)) + return true; + + // CRC32 and CRC64 are in little endian. Guess that all the future + // 32-bit and 64-bit Check values are little endian too. It shouldn't + // be a too big problem if this guess is wrong. + if (size == 4) + snprintf(check_value, sizeof(check_value), + "%08" PRIx32, conv32le(buf.u32[0])); + else if (size == 8) + snprintf(check_value, sizeof(check_value), + "%016" PRIx64, conv64le(buf.u64[0])); + else + for (size_t i = 0; i < size; ++i) + snprintf(check_value + i * 2, 3, "%02x", buf.u8[i]); + + return false; +} + + +/// \brief Parse detailed information about a Block +/// +/// Since this requires seek(s), listing information about all Blocks can +/// be slow. +/// +/// \param pair Input file +/// \param iter Location of the Block whose Check value should +/// be printed. +/// \param bhi Pointer to structure where to store the information +/// about the Block Header field. +/// +/// \return False on success, true on error. If an error occurs, +/// the error message is printed too so the caller doesn't +/// need to worry about that. +static bool +parse_details(file_pair *pair, const lzma_index_iter *iter, + block_header_info *bhi, xz_file_info *xfi) +{ + if (parse_block_header(pair, iter, bhi, xfi)) + return true; + + if (parse_check_value(pair, iter)) + return true; + + return false; +} + + +/// \brief Get the compression ratio +/// +/// This has slightly different format than that is used in message.c. +static const char * +get_ratio(uint64_t compressed_size, uint64_t uncompressed_size) +{ + if (uncompressed_size == 0) + return "---"; + + const double ratio = (double)(compressed_size) + / (double)(uncompressed_size); + if (ratio > 9.999) + return "---"; + + static char buf[16]; + snprintf(buf, sizeof(buf), "%.3f", ratio); + return buf; +} + + +/// \brief Get a comma-separated list of Check names +/// +/// The check names are translated with gettext except when in robot mode. +/// +/// \param buf Buffer to hold the resulting string +/// \param checks Bit mask of Checks to print +/// \param space_after_comma +/// It's better to not use spaces in table-like listings, +/// but in more verbose formats a space after a comma +/// is good for readability. +static void +get_check_names(char buf[CHECKS_STR_SIZE], + uint32_t checks, bool space_after_comma) +{ + // If we get called when there are no Checks to print, set checks + // to 1 so that we print "None". This can happen in the robot mode + // when printing the totals line if there are no valid input files. + if (checks == 0) + checks = 1; + + char *pos = buf; + size_t left = CHECKS_STR_SIZE; + + const char *sep = space_after_comma ? ", " : ","; + bool comma = false; + + for (size_t i = 0; i <= LZMA_CHECK_ID_MAX; ++i) { + if (checks & (UINT32_C(1) << i)) { + my_snprintf(&pos, &left, "%s%s", + comma ? sep : "", + opt_robot ? check_names[i] + : _(check_names[i])); + comma = true; + } + } + + return; +} + + +static bool +print_info_basic(const xz_file_info *xfi, file_pair *pair) +{ + static bool headings_displayed = false; + if (!headings_displayed) { + headings_displayed = true; + // TRANSLATORS: These are column headings. From Strms (Streams) + // to Ratio, the columns are right aligned. Check and Filename + // are left aligned. If you need longer words, it's OK to + // use two lines here. Test with "xz -l foo.xz". + puts(_("Strms Blocks Compressed Uncompressed Ratio " + "Check Filename")); + } + + char checks[CHECKS_STR_SIZE]; + get_check_names(checks, lzma_index_checks(xfi->idx), false); + + const char *cols[7] = { + uint64_to_str(lzma_index_stream_count(xfi->idx), 0), + uint64_to_str(lzma_index_block_count(xfi->idx), 1), + uint64_to_nicestr(lzma_index_file_size(xfi->idx), + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr(lzma_index_uncompressed_size(xfi->idx), + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(lzma_index_file_size(xfi->idx), + lzma_index_uncompressed_size(xfi->idx)), + checks, + pair->src_name, + }; + printf("%*s %*s %*s %*s %*s %-*s %s\n", + tuklib_mbstr_fw(cols[0], 5), cols[0], + tuklib_mbstr_fw(cols[1], 7), cols[1], + tuklib_mbstr_fw(cols[2], 11), cols[2], + tuklib_mbstr_fw(cols[3], 11), cols[3], + tuklib_mbstr_fw(cols[4], 5), cols[4], + tuklib_mbstr_fw(cols[5], 7), cols[5], + cols[6]); + + return false; +} + + +static void +print_adv_helper(uint64_t stream_count, uint64_t block_count, + uint64_t compressed_size, uint64_t uncompressed_size, + uint32_t checks, uint64_t stream_padding) +{ + char checks_str[CHECKS_STR_SIZE]; + get_check_names(checks_str, checks, true); + + printf(" %-*s %s\n", COLON_STR(COLON_STR_STREAMS), + uint64_to_str(stream_count, 0)); + printf(" %-*s %s\n", COLON_STR(COLON_STR_BLOCKS), + uint64_to_str(block_count, 0)); + printf(" %-*s %s\n", COLON_STR(COLON_STR_COMPRESSED_SIZE), + uint64_to_nicestr(compressed_size, + NICESTR_B, NICESTR_TIB, true, 0)); + printf(" %-*s %s\n", COLON_STR(COLON_STR_UNCOMPRESSED_SIZE), + uint64_to_nicestr(uncompressed_size, + NICESTR_B, NICESTR_TIB, true, 0)); + printf(" %-*s %s\n", COLON_STR(COLON_STR_RATIO), + get_ratio(compressed_size, uncompressed_size)); + printf(" %-*s %s\n", COLON_STR(COLON_STR_CHECK), checks_str); + printf(" %-*s %s\n", COLON_STR(COLON_STR_STREAM_PADDING), + uint64_to_nicestr(stream_padding, + NICESTR_B, NICESTR_TIB, true, 0)); + return; +} + + +static bool +print_info_adv(xz_file_info *xfi, file_pair *pair) +{ + // Print the overall information. + print_adv_helper(lzma_index_stream_count(xfi->idx), + lzma_index_block_count(xfi->idx), + lzma_index_file_size(xfi->idx), + lzma_index_uncompressed_size(xfi->idx), + lzma_index_checks(xfi->idx), + xfi->stream_padding); + + // Size of the biggest Check. This is used to calculate the width + // of the CheckVal field. The table would get insanely wide if + // we always reserved space for 64-byte Check (128 chars as hex). + uint32_t check_max = 0; + + // Print information about the Streams. + // + // All except Check are right aligned; Check is left aligned. + // Test with "xz -lv foo.xz". + printf(" %s\n %*s %*s %*s %*s %*s %*s %*s %-*s %*s\n", + _(colon_strs[COLON_STR_STREAMS]), + HEADING_STR(HEADING_STREAM), + HEADING_STR(HEADING_BLOCKS), + HEADING_STR(HEADING_COMPOFFSET), + HEADING_STR(HEADING_UNCOMPOFFSET), + HEADING_STR(HEADING_COMPSIZE), + HEADING_STR(HEADING_UNCOMPSIZE), + HEADING_STR(HEADING_RATIO), + HEADING_STR(HEADING_CHECK), + HEADING_STR(HEADING_PADDING)); + + lzma_index_iter iter; + lzma_index_iter_init(&iter, xfi->idx); + + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) { + const char *cols1[4] = { + uint64_to_str(iter.stream.number, 0), + uint64_to_str(iter.stream.block_count, 1), + uint64_to_str(iter.stream.compressed_offset, 2), + uint64_to_str(iter.stream.uncompressed_offset, 3), + }; + printf(" %*s %*s %*s %*s ", + tuklib_mbstr_fw(cols1[0], + headings[HEADING_STREAM].columns), + cols1[0], + tuklib_mbstr_fw(cols1[1], + headings[HEADING_BLOCKS].columns), + cols1[1], + tuklib_mbstr_fw(cols1[2], + headings[HEADING_COMPOFFSET].columns), + cols1[2], + tuklib_mbstr_fw(cols1[3], + headings[HEADING_UNCOMPOFFSET].columns), + cols1[3]); + + const char *cols2[5] = { + uint64_to_str(iter.stream.compressed_size, 0), + uint64_to_str(iter.stream.uncompressed_size, 1), + get_ratio(iter.stream.compressed_size, + iter.stream.uncompressed_size), + _(check_names[iter.stream.flags->check]), + uint64_to_str(iter.stream.padding, 2), + }; + printf("%*s %*s %*s %-*s %*s\n", + tuklib_mbstr_fw(cols2[0], + headings[HEADING_COMPSIZE].columns), + cols2[0], + tuklib_mbstr_fw(cols2[1], + headings[HEADING_UNCOMPSIZE].columns), + cols2[1], + tuklib_mbstr_fw(cols2[2], + headings[HEADING_RATIO].columns), + cols2[2], + tuklib_mbstr_fw(cols2[3], + headings[HEADING_CHECK].columns), + cols2[3], + tuklib_mbstr_fw(cols2[4], + headings[HEADING_PADDING].columns), + cols2[4]); + + // Update the maximum Check size. + if (lzma_check_size(iter.stream.flags->check) > check_max) + check_max = lzma_check_size(iter.stream.flags->check); + } + + // Cache the verbosity level to a local variable. + const bool detailed = message_verbosity_get() >= V_DEBUG; + + // Print information about the Blocks but only if there is + // at least one Block. + if (lzma_index_block_count(xfi->idx) > 0) { + // Calculate the width of the CheckVal column. This can be + // used as is as the field width for printf() when printing + // the actual check value as it is hexadecimal. However, to + // print the column heading, further calculation is needed + // to handle a translated string (it's done a few lines later). + assert(check_max <= LZMA_CHECK_SIZE_MAX); + const int checkval_width = my_max( + headings[HEADING_CHECKVAL].columns, + (int)(2 * check_max)); + + // All except Check are right aligned; Check is left aligned. + printf(" %s\n %*s %*s %*s %*s %*s %*s %*s %-*s", + _(colon_strs[COLON_STR_BLOCKS]), + HEADING_STR(HEADING_STREAM), + HEADING_STR(HEADING_BLOCK), + HEADING_STR(HEADING_COMPOFFSET), + HEADING_STR(HEADING_UNCOMPOFFSET), + HEADING_STR(HEADING_TOTALSIZE), + HEADING_STR(HEADING_UNCOMPSIZE), + HEADING_STR(HEADING_RATIO), + detailed ? headings[HEADING_CHECK].fw : 1, + _(headings[HEADING_CHECK].str)); + + if (detailed) { + // CheckVal (Check value), Flags, and Filters are + // left aligned. Block Header Size, CompSize, and + // MemUsage are right aligned. Test with + // "xz -lvv foo.xz". + printf(" %-*s %*s %-*s %*s %*s %s", + headings[HEADING_CHECKVAL].fw + + checkval_width + - headings[HEADING_CHECKVAL].columns, + _(headings[HEADING_CHECKVAL].str), + HEADING_STR(HEADING_HEADERSIZE), + HEADING_STR(HEADING_HEADERFLAGS), + HEADING_STR(HEADING_COMPSIZE), + HEADING_STR(HEADING_MEMUSAGE), + _(headings[HEADING_FILTERS].str)); + } + + putchar('\n'); + + lzma_index_iter_init(&iter, xfi->idx); + + // Iterate over the Blocks. + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { + // If in detailed mode, collect the information from + // Block Header before starting to print the next line. + block_header_info bhi = BLOCK_HEADER_INFO_INIT; + if (detailed && parse_details(pair, &iter, &bhi, xfi)) + return true; + + const char *cols1[4] = { + uint64_to_str(iter.stream.number, 0), + uint64_to_str( + iter.block.number_in_stream, 1), + uint64_to_str( + iter.block.compressed_file_offset, 2), + uint64_to_str( + iter.block.uncompressed_file_offset, 3) + }; + printf(" %*s %*s %*s %*s ", + tuklib_mbstr_fw(cols1[0], + headings[HEADING_STREAM].columns), + cols1[0], + tuklib_mbstr_fw(cols1[1], + headings[HEADING_BLOCK].columns), + cols1[1], + tuklib_mbstr_fw(cols1[2], + headings[HEADING_COMPOFFSET].columns), + cols1[2], + tuklib_mbstr_fw(cols1[3], headings[ + HEADING_UNCOMPOFFSET].columns), + cols1[3]); + + const char *cols2[4] = { + uint64_to_str(iter.block.total_size, 0), + uint64_to_str(iter.block.uncompressed_size, + 1), + get_ratio(iter.block.total_size, + iter.block.uncompressed_size), + _(check_names[iter.stream.flags->check]) + }; + printf("%*s %*s %*s %-*s", + tuklib_mbstr_fw(cols2[0], + headings[HEADING_TOTALSIZE].columns), + cols2[0], + tuklib_mbstr_fw(cols2[1], + headings[HEADING_UNCOMPSIZE].columns), + cols2[1], + tuklib_mbstr_fw(cols2[2], + headings[HEADING_RATIO].columns), + cols2[2], + tuklib_mbstr_fw(cols2[3], detailed + ? headings[HEADING_CHECK].columns : 1), + cols2[3]); + + if (detailed) { + const lzma_vli compressed_size + = iter.block.unpadded_size + - bhi.header_size + - lzma_check_size( + iter.stream.flags->check); + + const char *cols3[6] = { + check_value, + uint64_to_str(bhi.header_size, 0), + bhi.flags, + uint64_to_str(compressed_size, 1), + uint64_to_str( + round_up_to_mib(bhi.memusage), + 2), + bhi.filter_chain + }; + // Show MiB for memory usage, because it + // is the only size which is not in bytes. + printf(" %-*s %*s %-*s %*s %*s MiB %s", + checkval_width, cols3[0], + tuklib_mbstr_fw(cols3[1], headings[ + HEADING_HEADERSIZE].columns), + cols3[1], + tuklib_mbstr_fw(cols3[2], headings[ + HEADING_HEADERFLAGS].columns), + cols3[2], + tuklib_mbstr_fw(cols3[3], headings[ + HEADING_COMPSIZE].columns), + cols3[3], + tuklib_mbstr_fw(cols3[4], headings[ + HEADING_MEMUSAGE].columns - 4), + cols3[4], + cols3[5]); + } + + putchar('\n'); + block_header_info_end(&bhi); + } + } + + if (detailed) { + printf(" %-*s %s MiB\n", COLON_STR(COLON_STR_MEMORY_NEEDED), + uint64_to_str( + round_up_to_mib(xfi->memusage_max), 0)); + printf(" %-*s %s\n", COLON_STR(COLON_STR_SIZES_IN_HEADERS), + xfi->all_have_sizes ? _("Yes") : _("No")); + //printf(" %-*s %s\n", COLON_STR(COLON_STR_MINIMUM_XZ_VERSION), + printf(_(" Minimum XZ Utils version: %s\n"), + xz_ver_to_str(xfi->min_version)); + } + + return false; +} + + +static bool +print_info_robot(xz_file_info *xfi, file_pair *pair) +{ + char checks[CHECKS_STR_SIZE]; + get_check_names(checks, lzma_index_checks(xfi->idx), false); + + printf("name\t%s\n", pair->src_name); + + printf("file\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%s\t%" PRIu64 "\n", + lzma_index_stream_count(xfi->idx), + lzma_index_block_count(xfi->idx), + lzma_index_file_size(xfi->idx), + lzma_index_uncompressed_size(xfi->idx), + get_ratio(lzma_index_file_size(xfi->idx), + lzma_index_uncompressed_size(xfi->idx)), + checks, + xfi->stream_padding); + + if (message_verbosity_get() >= V_VERBOSE) { + lzma_index_iter iter; + lzma_index_iter_init(&iter, xfi->idx); + + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) + printf("stream\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%s\t%" PRIu64 "\n", + iter.stream.number, + iter.stream.block_count, + iter.stream.compressed_offset, + iter.stream.uncompressed_offset, + iter.stream.compressed_size, + iter.stream.uncompressed_size, + get_ratio(iter.stream.compressed_size, + iter.stream.uncompressed_size), + check_names[iter.stream.flags->check], + iter.stream.padding); + + lzma_index_iter_rewind(&iter); + + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { + block_header_info bhi = BLOCK_HEADER_INFO_INIT; + if (message_verbosity_get() >= V_DEBUG + && parse_details( + pair, &iter, &bhi, xfi)) + return true; + + printf("block\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 + "\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s", + iter.stream.number, + iter.block.number_in_stream, + iter.block.number_in_file, + iter.block.compressed_file_offset, + iter.block.uncompressed_file_offset, + iter.block.total_size, + iter.block.uncompressed_size, + get_ratio(iter.block.total_size, + iter.block.uncompressed_size), + check_names[iter.stream.flags->check]); + + if (message_verbosity_get() >= V_DEBUG) + printf("\t%s\t%" PRIu32 "\t%s\t%" PRIu64 + "\t%" PRIu64 "\t%s", + check_value, + bhi.header_size, + bhi.flags, + bhi.compressed_size, + bhi.memusage, + bhi.filter_chain); + + putchar('\n'); + block_header_info_end(&bhi); + } + } + + if (message_verbosity_get() >= V_DEBUG) + printf("summary\t%" PRIu64 "\t%s\t%" PRIu32 "\n", + xfi->memusage_max, + xfi->all_have_sizes ? "yes" : "no", + xfi->min_version); + + return false; +} + + +static void +update_totals(const xz_file_info *xfi) +{ + // TODO: Integer overflow checks + ++totals.files; + totals.streams += lzma_index_stream_count(xfi->idx); + totals.blocks += lzma_index_block_count(xfi->idx); + totals.compressed_size += lzma_index_file_size(xfi->idx); + totals.uncompressed_size += lzma_index_uncompressed_size(xfi->idx); + totals.stream_padding += xfi->stream_padding; + totals.checks |= lzma_index_checks(xfi->idx); + + if (totals.memusage_max < xfi->memusage_max) + totals.memusage_max = xfi->memusage_max; + + if (totals.min_version < xfi->min_version) + totals.min_version = xfi->min_version; + + totals.all_have_sizes &= xfi->all_have_sizes; + + return; +} + + +static void +print_totals_basic(void) +{ + // Print a separator line. + char line[80]; + memset(line, '-', sizeof(line)); + line[sizeof(line) - 1] = '\0'; + puts(line); + + // Get the check names. + char checks[CHECKS_STR_SIZE]; + get_check_names(checks, totals.checks, false); + + // Print the totals except the file count, which needs + // special handling. + printf("%5s %7s %11s %11s %5s %-7s ", + uint64_to_str(totals.streams, 0), + uint64_to_str(totals.blocks, 1), + uint64_to_nicestr(totals.compressed_size, + NICESTR_B, NICESTR_TIB, false, 2), + uint64_to_nicestr(totals.uncompressed_size, + NICESTR_B, NICESTR_TIB, false, 3), + get_ratio(totals.compressed_size, + totals.uncompressed_size), + checks); + + // Since we print totals only when there are at least two files, + // the English message will always use "%s files". But some other + // languages need different forms for different plurals so we + // have to translate this with ngettext(). + // + // TRANSLATORS: %s is an integer. Only the plural form of this + // message is used (e.g. "2 files"). Test with "xz -l foo.xz bar.xz". + printf(ngettext("%s file\n", "%s files\n", + totals.files <= ULONG_MAX ? totals.files + : (totals.files % 1000000) + 1000000), + uint64_to_str(totals.files, 0)); + + return; +} + + +static void +print_totals_adv(void) +{ + putchar('\n'); + puts(_("Totals:")); + printf(" %-*s %s\n", COLON_STR(COLON_STR_NUMBER_OF_FILES), + uint64_to_str(totals.files, 0)); + print_adv_helper(totals.streams, totals.blocks, + totals.compressed_size, totals.uncompressed_size, + totals.checks, totals.stream_padding); + + if (message_verbosity_get() >= V_DEBUG) { + printf(" %-*s %s MiB\n", COLON_STR(COLON_STR_MEMORY_NEEDED), + uint64_to_str( + round_up_to_mib(totals.memusage_max), 0)); + printf(" %-*s %s\n", COLON_STR(COLON_STR_SIZES_IN_HEADERS), + totals.all_have_sizes ? _("Yes") : _("No")); + //printf(" %-*s %s\n", COLON_STR(COLON_STR_MINIMUM_XZ_VERSION), + printf(_(" Minimum XZ Utils version: %s\n"), + xz_ver_to_str(totals.min_version)); + } + + return; +} + + +static void +print_totals_robot(void) +{ + char checks[CHECKS_STR_SIZE]; + get_check_names(checks, totals.checks, false); + + printf("totals\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 + "\t%s\t%s\t%" PRIu64 "\t%" PRIu64, + totals.streams, + totals.blocks, + totals.compressed_size, + totals.uncompressed_size, + get_ratio(totals.compressed_size, + totals.uncompressed_size), + checks, + totals.stream_padding, + totals.files); + + if (message_verbosity_get() >= V_DEBUG) + printf("\t%" PRIu64 "\t%s\t%" PRIu32, + totals.memusage_max, + totals.all_have_sizes ? "yes" : "no", + totals.min_version); + + putchar('\n'); + + return; +} + + +extern void +list_totals(void) +{ + if (opt_robot) { + // Always print totals in --robot mode. It can be convenient + // in some cases and doesn't complicate usage of the + // single-file case much. + print_totals_robot(); + + } else if (totals.files > 1) { + // For non-robot mode, totals are printed only if there + // is more than one file. + if (message_verbosity_get() <= V_WARNING) + print_totals_basic(); + else + print_totals_adv(); + } + + return; +} + + +extern void +list_file(const char *filename) +{ + if (opt_format != FORMAT_XZ && opt_format != FORMAT_AUTO) + message_fatal(_("--list works only on .xz files " + "(--format=xz or --format=auto)")); + + message_filename(filename); + + if (filename == stdin_filename) { + message_error(_("--list does not support reading from " + "standard input")); + return; + } + + init_field_widths(); + + // Unset opt_stdout so that io_open_src() won't accept special files. + // Set opt_force so that io_open_src() will follow symlinks. + opt_stdout = false; + opt_force = true; + file_pair *pair = io_open_src(filename); + if (pair == NULL) + return; + + xz_file_info xfi = XZ_FILE_INFO_INIT; + if (!parse_indexes(&xfi, pair)) { + bool fail; + + // We have three main modes: + // - --robot, which has submodes if --verbose is specified + // once or twice + // - Normal --list without --verbose + // - --list with one or two --verbose + if (opt_robot) + fail = print_info_robot(&xfi, pair); + else if (message_verbosity_get() <= V_WARNING) + fail = print_info_basic(&xfi, pair); + else + fail = print_info_adv(&xfi, pair); + + // Update the totals that are displayed after all + // the individual files have been listed. Don't count + // broken files. + if (!fail) + update_totals(&xfi); + + lzma_index_end(xfi.idx, NULL); + } + + io_close(pair, false); + return; +} diff --git a/src/xz/list.h b/src/xz/list.h new file mode 100644 index 0000000..a4c6ec7 --- /dev/null +++ b/src/xz/list.h @@ -0,0 +1,18 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file list.h +/// \brief List information about .xz files +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// \brief List information about the given .xz file +extern void list_file(const char *filename); + + +/// \brief Show the totals after all files have been listed +extern void list_totals(void); diff --git a/src/xz/main.c b/src/xz/main.c new file mode 100644 index 0000000..c9c3dec --- /dev/null +++ b/src/xz/main.c @@ -0,0 +1,344 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file main.c +/// \brief main() +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include <ctype.h> + +/// Exit status to use. This can be changed with set_exit_status(). +static enum exit_status_type exit_status = E_SUCCESS; + +#if defined(_WIN32) && !defined(__CYGWIN__) +/// exit_status has to be protected with a critical section due to +/// how "signal handling" is done on Windows. See signals.c for details. +static CRITICAL_SECTION exit_status_cs; +#endif + +/// True if --no-warn is specified. When this is true, we don't set +/// the exit status to E_WARNING when something worth a warning happens. +static bool no_warn = false; + + +extern void +set_exit_status(enum exit_status_type new_status) +{ + assert(new_status == E_WARNING || new_status == E_ERROR); + +#if defined(_WIN32) && !defined(__CYGWIN__) + EnterCriticalSection(&exit_status_cs); +#endif + + if (exit_status != E_ERROR) + exit_status = new_status; + +#if defined(_WIN32) && !defined(__CYGWIN__) + LeaveCriticalSection(&exit_status_cs); +#endif + + return; +} + + +extern void +set_exit_no_warn(void) +{ + no_warn = true; + return; +} + + +static const char * +read_name(const args_info *args) +{ + // FIXME: Maybe we should have some kind of memory usage limit here + // like the tool has for the actual compression and decompression. + // Giving some huge text file with --files0 makes us to read the + // whole file in RAM. + static char *name = NULL; + static size_t size = 256; + + // Allocate the initial buffer. This is never freed, since after it + // is no longer needed, the program exits very soon. It is safe to + // use xmalloc() and xrealloc() in this function, because while + // executing this function, no files are open for writing, and thus + // there's no need to cleanup anything before exiting. + if (name == NULL) + name = xmalloc(size); + + // Write position in name + size_t pos = 0; + + // Read one character at a time into name. + while (!user_abort) { + const int c = fgetc(args->files_file); + + if (ferror(args->files_file)) { + // Take care of EINTR since we have established + // the signal handlers already. + if (errno == EINTR) + continue; + + message_error(_("%s: Error reading filenames: %s"), + args->files_name, strerror(errno)); + return NULL; + } + + if (feof(args->files_file)) { + if (pos != 0) + message_error(_("%s: Unexpected end of input " + "when reading filenames"), + args->files_name); + + return NULL; + } + + if (c == args->files_delim) { + // We allow consecutive newline (--files) or '\0' + // characters (--files0), and ignore such empty + // filenames. + if (pos == 0) + continue; + + // A non-empty name was read. Terminate it with '\0' + // and return it. + name[pos] = '\0'; + return name; + } + + if (c == '\0') { + // A null character was found when using --files, + // which expects plain text input separated with + // newlines. + message_error(_("%s: Null character found when " + "reading filenames; maybe you meant " + "to use `--files0' instead " + "of `--files'?"), args->files_name); + return NULL; + } + + name[pos++] = c; + + // Allocate more memory if needed. There must always be space + // at least for one character to allow terminating the string + // with '\0'. + if (pos == size) { + size *= 2; + name = xrealloc(name, size); + } + } + + return NULL; +} + + +int +main(int argc, char **argv) +{ +#ifdef HAVE_PLEDGE + // OpenBSD's pledge(2) sandbox + // + // Unconditionally enable sandboxing with fairly relaxed promises. + // This is still way better than having no sandbox at all. :-) + // More strict promises will be made later in file_io.c if possible. + if (pledge("stdio rpath wpath cpath fattr", "")) { + // Don't translate the string or use message_fatal() as + // those haven't been initialized yet. + fprintf(stderr, "%s: Failed to enable the sandbox\n", argv[0]); + return E_ERROR; + } +#endif + +#if defined(_WIN32) && !defined(__CYGWIN__) + InitializeCriticalSection(&exit_status_cs); +#endif + + // Set up the progname variable. + tuklib_progname_init(argv); + + // Initialize the file I/O. This makes sure that + // stdin, stdout, and stderr are something valid. + io_init(); + + // Set up the locale and message translations. + tuklib_gettext_init(PACKAGE, LOCALEDIR); + + // Initialize handling of error/warning/other messages. + message_init(); + + // Set hardware-dependent default values. These can be overridden + // on the command line, thus this must be done before args_parse(). + hardware_init(); + + // Parse the command line arguments and get an array of filenames. + // This doesn't return if something is wrong with the command line + // arguments. If there are no arguments, one filename ("-") is still + // returned to indicate stdin. + args_info args; + args_parse(&args, argc, argv); + + if (opt_mode != MODE_LIST && opt_robot) + message_fatal(_("Compression and decompression with --robot " + "are not supported yet.")); + + // Tell the message handling code how many input files there are if + // we know it. This way the progress indicator can show it. + if (args.files_name != NULL) + message_set_files(0); + else + message_set_files(args.arg_count); + + // Refuse to write compressed data to standard output if it is + // a terminal. + if (opt_mode == MODE_COMPRESS) { + if (opt_stdout || (args.arg_count == 1 + && strcmp(args.arg_names[0], "-") == 0)) { + if (is_tty_stdout()) { + message_try_help(); + tuklib_exit(E_ERROR, E_ERROR, false); + } + } + } + + // Set up the signal handlers. We don't need these before we + // start the actual action and not in --list mode, so this is + // done after parsing the command line arguments. + // + // It's good to keep signal handlers in normal compression and + // decompression modes even when only writing to stdout, because + // we might need to restore O_APPEND flag on stdout before exiting. + // In --test mode, signal handlers aren't really needed, but let's + // keep them there for consistency with normal decompression. + if (opt_mode != MODE_LIST) + signals_init(); + +#ifdef ENABLE_SANDBOX + // Set a flag that sandboxing is allowed if all these are true: + // - --files or --files0 wasn't used. + // - There is exactly one input file or we are reading from stdin. + // - We won't create any files: output goes to stdout or --test + // or --list was used. Note that --test implies opt_stdout = true + // but --list doesn't. + // + // This is obviously not ideal but it was easy to implement and + // it covers the most common use cases. + // + // TODO: Make sandboxing work for other situations too. + if (args.files_name == NULL && args.arg_count == 1 + && (opt_stdout || strcmp("-", args.arg_names[0]) == 0 + || opt_mode == MODE_LIST)) + io_allow_sandbox(); +#endif + + // coder_run() handles compression, decompression, and testing. + // list_file() is for --list. + void (*run)(const char *filename) = &coder_run; +#ifdef HAVE_DECODERS + if (opt_mode == MODE_LIST) + run = &list_file; +#endif + + // Process the files given on the command line. Note that if no names + // were given, args_parse() gave us a fake "-" filename. + for (unsigned i = 0; i < args.arg_count && !user_abort; ++i) { + if (strcmp("-", args.arg_names[i]) == 0) { + // Processing from stdin to stdout. Check that we + // aren't writing compressed data to a terminal or + // reading it from a terminal. + if (opt_mode == MODE_COMPRESS) { + if (is_tty_stdout()) + continue; + } else if (is_tty_stdin()) { + continue; + } + + // It doesn't make sense to compress data from stdin + // if we are supposed to read filenames from stdin + // too (enabled with --files or --files0). + if (args.files_name == stdin_filename) { + message_error(_("Cannot read data from " + "standard input when " + "reading filenames " + "from standard input")); + continue; + } + + // Replace the "-" with a special pointer, which is + // recognized by coder_run() and other things. + // This way error messages get a proper filename + // string and the code still knows that it is + // handling the special case of stdin. + args.arg_names[i] = (char *)stdin_filename; + } + + // Do the actual compression or decompression. + run(args.arg_names[i]); + } + + // If --files or --files0 was used, process the filenames from the + // given file or stdin. Note that here we don't consider "-" to + // indicate stdin like we do with the command line arguments. + if (args.files_name != NULL) { + // read_name() checks for user_abort so we don't need to + // check it as loop termination condition. + while (true) { + const char *name = read_name(&args); + if (name == NULL) + break; + + // read_name() doesn't return empty names. + assert(name[0] != '\0'); + run(name); + } + + if (args.files_name != stdin_filename) + (void)fclose(args.files_file); + } + +#ifdef HAVE_DECODERS + // All files have now been handled. If in --list mode, display + // the totals before exiting. We don't have signal handlers + // enabled in --list mode, so we don't need to check user_abort. + if (opt_mode == MODE_LIST) { + assert(!user_abort); + list_totals(); + } +#endif + +#ifndef NDEBUG + coder_free(); + args_free(); +#endif + + // If we have got a signal, raise it to kill the program instead + // of calling tuklib_exit(). + signals_exit(); + + // Make a local copy of exit_status to keep the Windows code + // thread safe. At this point it is fine if we miss the user + // pressing C-c and don't set the exit_status to E_ERROR on + // Windows. +#if defined(_WIN32) && !defined(__CYGWIN__) + EnterCriticalSection(&exit_status_cs); +#endif + + enum exit_status_type es = exit_status; + +#if defined(_WIN32) && !defined(__CYGWIN__) + LeaveCriticalSection(&exit_status_cs); +#endif + + // Suppress the exit status indicating a warning if --no-warn + // was specified. + if (es == E_WARNING && no_warn) + es = E_SUCCESS; + + tuklib_exit((int)es, E_ERROR, message_verbosity_get() != V_SILENT); +} diff --git a/src/xz/main.h b/src/xz/main.h new file mode 100644 index 0000000..323f2f7 --- /dev/null +++ b/src/xz/main.h @@ -0,0 +1,30 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file main.h +/// \brief Miscellaneous declarations +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// Possible exit status values. These are the same as used by gzip and bzip2. +enum exit_status_type { + E_SUCCESS = 0, + E_ERROR = 1, + E_WARNING = 2, +}; + + +/// Sets the exit status after a warning or error has occurred. If new_status +/// is E_WARNING and the old exit status was already E_ERROR, the exit +/// status is not changed. +extern void set_exit_status(enum exit_status_type new_status); + + +/// Use E_SUCCESS instead of E_WARNING if something worth a warning occurs +/// but nothing worth an error has occurred. This is called when --no-warn +/// is specified. +extern void set_exit_no_warn(void); diff --git a/src/xz/message.c b/src/xz/message.c new file mode 100644 index 0000000..c54ebc5 --- /dev/null +++ b/src/xz/message.c @@ -0,0 +1,1146 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file message.c +/// \brief Printing messages +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#include <stdarg.h> + + +/// Number of the current file +static unsigned int files_pos = 0; + +/// Total number of input files; zero if unknown. +static unsigned int files_total; + +/// Verbosity level +static enum message_verbosity verbosity = V_WARNING; + +/// Filename which we will print with the verbose messages +static const char *filename; + +/// True once the a filename has been printed to stderr as part of progress +/// message. If automatic progress updating isn't enabled, this becomes true +/// after the first progress message has been printed due to user sending +/// SIGINFO, SIGUSR1, or SIGALRM. Once this variable is true, we will print +/// an empty line before the next filename to make the output more readable. +static bool first_filename_printed = false; + +/// This is set to true when we have printed the current filename to stderr +/// as part of a progress message. This variable is useful only if not +/// updating progress automatically: if user sends many SIGINFO, SIGUSR1, or +/// SIGALRM signals, we won't print the name of the same file multiple times. +static bool current_filename_printed = false; + +/// True if we should print progress indicator and update it automatically +/// if also verbose >= V_VERBOSE. +static bool progress_automatic; + +/// True if message_progress_start() has been called but +/// message_progress_end() hasn't been called yet. +static bool progress_started = false; + +/// This is true when a progress message was printed and the cursor is still +/// on the same line with the progress message. In that case, a newline has +/// to be printed before any error messages. +static bool progress_active = false; + +/// Pointer to lzma_stream used to do the encoding or decoding. +static lzma_stream *progress_strm; + +/// This is true if we are in passthru mode (not actually compressing or +/// decompressing) and thus cannot use lzma_get_progress(progress_strm, ...). +/// That is, we are using coder_passthru() in coder.c. +static bool progress_is_from_passthru; + +/// Expected size of the input stream is needed to show completion percentage +/// and estimate remaining time. +static uint64_t expected_in_size; + + +// Use alarm() and SIGALRM when they are supported. This has two minor +// advantages over the alternative of polling gettimeofday(): +// - It is possible for the user to send SIGINFO, SIGUSR1, or SIGALRM to +// get intermediate progress information even when --verbose wasn't used +// or stderr is not a terminal. +// - alarm() + SIGALRM seems to have slightly less overhead than polling +// gettimeofday(). +#ifdef SIGALRM + +const int message_progress_sigs[] = { + SIGALRM, +#ifdef SIGINFO + SIGINFO, +#endif +#ifdef SIGUSR1 + SIGUSR1, +#endif + 0 +}; + +/// The signal handler for SIGALRM sets this to true. It is set back to false +/// once the progress message has been updated. +static volatile sig_atomic_t progress_needs_updating = false; + +/// Signal handler for SIGALRM +static void +progress_signal_handler(int sig lzma_attribute((__unused__))) +{ + progress_needs_updating = true; + return; +} + +#else + +/// This is true when progress message printing is wanted. Using the same +/// variable name as above to avoid some ifdefs. +static bool progress_needs_updating = false; + +/// Elapsed time when the next progress message update should be done. +static uint64_t progress_next_update; + +#endif + + +extern void +message_init(void) +{ + // If --verbose is used, we use a progress indicator if and only + // if stderr is a terminal. If stderr is not a terminal, we print + // verbose information only after finishing the file. As a special + // exception, even if --verbose was not used, user can send SIGALRM + // to make us print progress information once without automatic + // updating. + progress_automatic = isatty(STDERR_FILENO); + + // Commented out because COLUMNS is rarely exported to environment. + // Most users have at least 80 columns anyway, let's think something + // fancy here if enough people complain. +/* + if (progress_automatic) { + // stderr is a terminal. Check the COLUMNS environment + // variable to see if the terminal is wide enough. If COLUMNS + // doesn't exist or it has some unparsable value, we assume + // that the terminal is wide enough. + const char *columns_str = getenv("COLUMNS"); + if (columns_str != NULL) { + char *endptr; + const long columns = strtol(columns_str, &endptr, 10); + if (*endptr != '\0' || columns < 80) + progress_automatic = false; + } + } +*/ + +#ifdef SIGALRM + // Establish the signal handlers which set a flag to tell us that + // progress info should be updated. + struct sigaction sa; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + sa.sa_handler = &progress_signal_handler; + + for (size_t i = 0; message_progress_sigs[i] != 0; ++i) + if (sigaction(message_progress_sigs[i], &sa, NULL)) + message_signal_handler(); +#endif + + return; +} + + +extern void +message_verbosity_increase(void) +{ + if (verbosity < V_DEBUG) + ++verbosity; + + return; +} + + +extern void +message_verbosity_decrease(void) +{ + if (verbosity > V_SILENT) + --verbosity; + + return; +} + + +extern enum message_verbosity +message_verbosity_get(void) +{ + return verbosity; +} + + +extern void +message_set_files(unsigned int files) +{ + files_total = files; + return; +} + + +/// Prints the name of the current file if it hasn't been printed already, +/// except if we are processing exactly one stream from stdin to stdout. +/// I think it looks nicer to not print "(stdin)" when --verbose is used +/// in a pipe and no other files are processed. +static void +print_filename(void) +{ + if (!opt_robot && (files_total != 1 || filename != stdin_filename)) { + signals_block(); + + FILE *file = opt_mode == MODE_LIST ? stdout : stderr; + + // If a file was already processed, put an empty line + // before the next filename to improve readability. + if (first_filename_printed) + fputc('\n', file); + + first_filename_printed = true; + current_filename_printed = true; + + // If we don't know how many files there will be due + // to usage of --files or --files0. + if (files_total == 0) + fprintf(file, "%s (%u)\n", filename, + files_pos); + else + fprintf(file, "%s (%u/%u)\n", filename, + files_pos, files_total); + + signals_unblock(); + } + + return; +} + + +extern void +message_filename(const char *src_name) +{ + // Start numbering the files starting from one. + ++files_pos; + filename = src_name; + + if (verbosity >= V_VERBOSE + && (progress_automatic || opt_mode == MODE_LIST)) + print_filename(); + else + current_filename_printed = false; + + return; +} + + +extern void +message_progress_start(lzma_stream *strm, bool is_passthru, uint64_t in_size) +{ + // Store the pointer to the lzma_stream used to do the coding. + // It is needed to find out the position in the stream. + progress_strm = strm; + progress_is_from_passthru = is_passthru; + + // Store the expected size of the file. If we aren't printing any + // statistics, then is will be unused. But since it is possible + // that the user sends us a signal to show statistics, we need + // to have it available anyway. + expected_in_size = in_size; + + // Indicate that progress info may need to be printed before + // printing error messages. + progress_started = true; + + // If progress indicator is wanted, print the filename and possibly + // the file count now. + if (verbosity >= V_VERBOSE && progress_automatic) { + // Start the timer to display the first progress message + // after one second. An alternative would be to show the + // first message almost immediately, but delaying by one + // second looks better to me, since extremely early + // progress info is pretty much useless. +#ifdef SIGALRM + // First disable a possibly existing alarm. + alarm(0); + progress_needs_updating = false; + alarm(1); +#else + progress_needs_updating = true; + progress_next_update = 1000; +#endif + } + + return; +} + + +/// Make the string indicating completion percentage. +static const char * +progress_percentage(uint64_t in_pos) +{ + // If the size of the input file is unknown or the size told us is + // clearly wrong since we have processed more data than the alleged + // size of the file, show a static string indicating that we have + // no idea of the completion percentage. + if (expected_in_size == 0 || in_pos > expected_in_size) + return "--- %"; + + // Never show 100.0 % before we actually are finished. + double percentage = (double)(in_pos) / (double)(expected_in_size) + * 99.9; + + // Use big enough buffer to hold e.g. a multibyte decimal point. + static char buf[16]; + snprintf(buf, sizeof(buf), "%.1f %%", percentage); + + return buf; +} + + +/// Make the string containing the amount of input processed, amount of +/// output produced, and the compression ratio. +static const char * +progress_sizes(uint64_t compressed_pos, uint64_t uncompressed_pos, bool final) +{ + // Use big enough buffer to hold e.g. a multibyte thousand separators. + static char buf[128]; + char *pos = buf; + size_t left = sizeof(buf); + + // Print the sizes. If this the final message, use more reasonable + // units than MiB if the file was small. + const enum nicestr_unit unit_min = final ? NICESTR_B : NICESTR_MIB; + my_snprintf(&pos, &left, "%s / %s", + uint64_to_nicestr(compressed_pos, + unit_min, NICESTR_TIB, false, 0), + uint64_to_nicestr(uncompressed_pos, + unit_min, NICESTR_TIB, false, 1)); + + // Avoid division by zero. If we cannot calculate the ratio, set + // it to some nice number greater than 10.0 so that it gets caught + // in the next if-clause. + const double ratio = uncompressed_pos > 0 + ? (double)(compressed_pos) / (double)(uncompressed_pos) + : 16.0; + + // If the ratio is very bad, just indicate that it is greater than + // 9.999. This way the length of the ratio field stays fixed. + if (ratio > 9.999) + snprintf(pos, left, " > %.3f", 9.999); + else + snprintf(pos, left, " = %.3f", ratio); + + return buf; +} + + +/// Make the string containing the processing speed of uncompressed data. +static const char * +progress_speed(uint64_t uncompressed_pos, uint64_t elapsed) +{ + // Don't print the speed immediately, since the early values look + // somewhat random. + if (elapsed < 3000) + return ""; + + // The first character of KiB/s, MiB/s, or GiB/s: + static const char unit[] = { 'K', 'M', 'G' }; + + size_t unit_index = 0; + + // Calculate the speed as KiB/s. + double speed = (double)(uncompressed_pos) + / ((double)(elapsed) * (1024.0 / 1000.0)); + + // Adjust the unit of the speed if needed. + while (speed > 999.0) { + speed /= 1024.0; + if (++unit_index == ARRAY_SIZE(unit)) + return ""; // Way too fast ;-) + } + + // Use decimal point only if the number is small. Examples: + // - 0.1 KiB/s + // - 9.9 KiB/s + // - 99 KiB/s + // - 999 KiB/s + // Use big enough buffer to hold e.g. a multibyte decimal point. + static char buf[16]; + snprintf(buf, sizeof(buf), "%.*f %ciB/s", + speed > 9.9 ? 0 : 1, speed, unit[unit_index]); + return buf; +} + + +/// Make a string indicating elapsed time. The format is either +/// M:SS or H:MM:SS depending on if the time is an hour or more. +static const char * +progress_time(uint64_t mseconds) +{ + // 9999 hours = 416 days + static char buf[sizeof("9999:59:59")]; + + // 32-bit variable is enough for elapsed time (136 years). + uint32_t seconds = (uint32_t)(mseconds / 1000); + + // Don't show anything if the time is zero or ridiculously big. + if (seconds == 0 || seconds > ((9999 * 60) + 59) * 60 + 59) + return ""; + + uint32_t minutes = seconds / 60; + seconds %= 60; + + if (minutes >= 60) { + const uint32_t hours = minutes / 60; + minutes %= 60; + snprintf(buf, sizeof(buf), + "%" PRIu32 ":%02" PRIu32 ":%02" PRIu32, + hours, minutes, seconds); + } else { + snprintf(buf, sizeof(buf), "%" PRIu32 ":%02" PRIu32, + minutes, seconds); + } + + return buf; +} + + +/// Return a string containing estimated remaining time when +/// reasonably possible. +static const char * +progress_remaining(uint64_t in_pos, uint64_t elapsed) +{ + // Don't show the estimated remaining time when it wouldn't + // make sense: + // - Input size is unknown. + // - Input has grown bigger since we started (de)compressing. + // - We haven't processed much data yet, so estimate would be + // too inaccurate. + // - Only a few seconds has passed since we started (de)compressing, + // so estimate would be too inaccurate. + if (expected_in_size == 0 || in_pos > expected_in_size + || in_pos < (UINT64_C(1) << 19) || elapsed < 8000) + return ""; + + // Calculate the estimate. Don't give an estimate of zero seconds, + // since it is possible that all the input has been already passed + // to the library, but there is still quite a bit of output pending. + uint32_t remaining = (uint32_t)((double)(expected_in_size - in_pos) + * ((double)(elapsed) / 1000.0) / (double)(in_pos)); + if (remaining < 1) + remaining = 1; + + static char buf[sizeof("9 h 55 min")]; + + // Select appropriate precision for the estimated remaining time. + if (remaining <= 10) { + // A maximum of 10 seconds remaining. + // Show the number of seconds as is. + snprintf(buf, sizeof(buf), "%" PRIu32 " s", remaining); + + } else if (remaining <= 50) { + // A maximum of 50 seconds remaining. + // Round up to the next multiple of five seconds. + remaining = (remaining + 4) / 5 * 5; + snprintf(buf, sizeof(buf), "%" PRIu32 " s", remaining); + + } else if (remaining <= 590) { + // A maximum of 9 minutes and 50 seconds remaining. + // Round up to the next multiple of ten seconds. + remaining = (remaining + 9) / 10 * 10; + snprintf(buf, sizeof(buf), "%" PRIu32 " min %" PRIu32 " s", + remaining / 60, remaining % 60); + + } else if (remaining <= 59 * 60) { + // A maximum of 59 minutes remaining. + // Round up to the next multiple of a minute. + remaining = (remaining + 59) / 60; + snprintf(buf, sizeof(buf), "%" PRIu32 " min", remaining); + + } else if (remaining <= 9 * 3600 + 50 * 60) { + // A maximum of 9 hours and 50 minutes left. + // Round up to the next multiple of ten minutes. + remaining = (remaining + 599) / 600 * 10; + snprintf(buf, sizeof(buf), "%" PRIu32 " h %" PRIu32 " min", + remaining / 60, remaining % 60); + + } else if (remaining <= 23 * 3600) { + // A maximum of 23 hours remaining. + // Round up to the next multiple of an hour. + remaining = (remaining + 3599) / 3600; + snprintf(buf, sizeof(buf), "%" PRIu32 " h", remaining); + + } else if (remaining <= 9 * 24 * 3600 + 23 * 3600) { + // A maximum of 9 days and 23 hours remaining. + // Round up to the next multiple of an hour. + remaining = (remaining + 3599) / 3600; + snprintf(buf, sizeof(buf), "%" PRIu32 " d %" PRIu32 " h", + remaining / 24, remaining % 24); + + } else if (remaining <= 999 * 24 * 3600) { + // A maximum of 999 days remaining. ;-) + // Round up to the next multiple of a day. + remaining = (remaining + 24 * 3600 - 1) / (24 * 3600); + snprintf(buf, sizeof(buf), "%" PRIu32 " d", remaining); + + } else { + // The estimated remaining time is too big. Don't show it. + return ""; + } + + return buf; +} + + +/// Get how much uncompressed and compressed data has been processed. +static void +progress_pos(uint64_t *in_pos, + uint64_t *compressed_pos, uint64_t *uncompressed_pos) +{ + uint64_t out_pos; + if (progress_is_from_passthru) { + // In passthru mode the progress info is in total_in/out but + // the *progress_strm itself isn't initialized and thus we + // cannot use lzma_get_progress(). + *in_pos = progress_strm->total_in; + out_pos = progress_strm->total_out; + } else { + lzma_get_progress(progress_strm, in_pos, &out_pos); + } + + // It cannot have processed more input than it has been given. + assert(*in_pos <= progress_strm->total_in); + + // It cannot have produced more output than it claims to have ready. + assert(out_pos >= progress_strm->total_out); + + if (opt_mode == MODE_COMPRESS) { + *compressed_pos = out_pos; + *uncompressed_pos = *in_pos; + } else { + *compressed_pos = *in_pos; + *uncompressed_pos = out_pos; + } + + return; +} + + +extern void +message_progress_update(void) +{ + if (!progress_needs_updating) + return; + + // Calculate how long we have been processing this file. + const uint64_t elapsed = mytime_get_elapsed(); + +#ifndef SIGALRM + if (progress_next_update > elapsed) + return; + + progress_next_update = elapsed + 1000; +#endif + + // Get our current position in the stream. + uint64_t in_pos; + uint64_t compressed_pos; + uint64_t uncompressed_pos; + progress_pos(&in_pos, &compressed_pos, &uncompressed_pos); + + // Block signals so that fprintf() doesn't get interrupted. + signals_block(); + + // Print the filename if it hasn't been printed yet. + if (!current_filename_printed) + print_filename(); + + // Print the actual progress message. The idea is that there is at + // least three spaces between the fields in typical situations, but + // even in rare situations there is at least one space. + const char *cols[5] = { + progress_percentage(in_pos), + progress_sizes(compressed_pos, uncompressed_pos, false), + progress_speed(uncompressed_pos, elapsed), + progress_time(elapsed), + progress_remaining(in_pos, elapsed), + }; + fprintf(stderr, "\r %*s %*s %*s %10s %10s\r", + tuklib_mbstr_fw(cols[0], 6), cols[0], + tuklib_mbstr_fw(cols[1], 35), cols[1], + tuklib_mbstr_fw(cols[2], 9), cols[2], + cols[3], + cols[4]); + +#ifdef SIGALRM + // Updating the progress info was finished. Reset + // progress_needs_updating to wait for the next SIGALRM. + // + // NOTE: This has to be done before alarm(1) or with (very) bad + // luck we could be setting this to false after the alarm has already + // been triggered. + progress_needs_updating = false; + + if (verbosity >= V_VERBOSE && progress_automatic) { + // Mark that the progress indicator is active, so if an error + // occurs, the error message gets printed cleanly. + progress_active = true; + + // Restart the timer so that progress_needs_updating gets + // set to true after about one second. + alarm(1); + } else { + // The progress message was printed because user had sent us + // SIGALRM. In this case, each progress message is printed + // on its own line. + fputc('\n', stderr); + } +#else + // When SIGALRM isn't supported and we get here, it's always due to + // automatic progress update. We set progress_active here too like + // described above. + assert(verbosity >= V_VERBOSE); + assert(progress_automatic); + progress_active = true; +#endif + + signals_unblock(); + + return; +} + + +static void +progress_flush(bool finished) +{ + if (!progress_started || verbosity < V_VERBOSE) + return; + + uint64_t in_pos; + uint64_t compressed_pos; + uint64_t uncompressed_pos; + progress_pos(&in_pos, &compressed_pos, &uncompressed_pos); + + // Avoid printing intermediate progress info if some error occurs + // in the beginning of the stream. (If something goes wrong later in + // the stream, it is sometimes useful to tell the user where the + // error approximately occurred, especially if the error occurs + // after a time-consuming operation.) + if (!finished && !progress_active + && (compressed_pos == 0 || uncompressed_pos == 0)) + return; + + progress_active = false; + + const uint64_t elapsed = mytime_get_elapsed(); + + signals_block(); + + // When using the auto-updating progress indicator, the final + // statistics are printed in the same format as the progress + // indicator itself. + if (progress_automatic) { + const char *cols[5] = { + finished ? "100 %" : progress_percentage(in_pos), + progress_sizes(compressed_pos, uncompressed_pos, true), + progress_speed(uncompressed_pos, elapsed), + progress_time(elapsed), + finished ? "" : progress_remaining(in_pos, elapsed), + }; + fprintf(stderr, "\r %*s %*s %*s %10s %10s\n", + tuklib_mbstr_fw(cols[0], 6), cols[0], + tuklib_mbstr_fw(cols[1], 35), cols[1], + tuklib_mbstr_fw(cols[2], 9), cols[2], + cols[3], + cols[4]); + } else { + // The filename is always printed. + fprintf(stderr, "%s: ", filename); + + // Percentage is printed only if we didn't finish yet. + if (!finished) { + // Don't print the percentage when it isn't known + // (starts with a dash). + const char *percentage = progress_percentage(in_pos); + if (percentage[0] != '-') + fprintf(stderr, "%s, ", percentage); + } + + // Size information is always printed. + fprintf(stderr, "%s", progress_sizes( + compressed_pos, uncompressed_pos, true)); + + // The speed and elapsed time aren't always shown. + const char *speed = progress_speed(uncompressed_pos, elapsed); + if (speed[0] != '\0') + fprintf(stderr, ", %s", speed); + + const char *elapsed_str = progress_time(elapsed); + if (elapsed_str[0] != '\0') + fprintf(stderr, ", %s", elapsed_str); + + fputc('\n', stderr); + } + + signals_unblock(); + + return; +} + + +extern void +message_progress_end(bool success) +{ + assert(progress_started); + progress_flush(success); + progress_started = false; + return; +} + + +static void +vmessage(enum message_verbosity v, const char *fmt, va_list ap) +{ + if (v <= verbosity) { + signals_block(); + + progress_flush(false); + + // TRANSLATORS: This is the program name in the beginning + // of the line in messages. Usually it becomes "xz: ". + // This is a translatable string because French needs + // a space before a colon. + fprintf(stderr, _("%s: "), progname); + +#ifdef __clang__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif + vfprintf(stderr, fmt, ap); +#ifdef __clang__ +# pragma GCC diagnostic pop +#endif + + fputc('\n', stderr); + + signals_unblock(); + } + + return; +} + + +extern void +message(enum message_verbosity v, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(v, fmt, ap); + va_end(ap); + return; +} + + +extern void +message_warning(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_WARNING, fmt, ap); + va_end(ap); + + set_exit_status(E_WARNING); + return; +} + + +extern void +message_error(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_ERROR, fmt, ap); + va_end(ap); + + set_exit_status(E_ERROR); + return; +} + + +extern void +message_fatal(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vmessage(V_ERROR, fmt, ap); + va_end(ap); + + tuklib_exit(E_ERROR, E_ERROR, false); +} + + +extern void +message_bug(void) +{ + message_fatal(_("Internal error (bug)")); +} + + +extern void +message_signal_handler(void) +{ + message_fatal(_("Cannot establish signal handlers")); +} + + +extern const char * +message_strm(lzma_ret code) +{ + switch (code) { + case LZMA_NO_CHECK: + return _("No integrity check; not verifying file integrity"); + + case LZMA_UNSUPPORTED_CHECK: + return _("Unsupported type of integrity check; " + "not verifying file integrity"); + + case LZMA_MEM_ERROR: + return strerror(ENOMEM); + + case LZMA_MEMLIMIT_ERROR: + return _("Memory usage limit reached"); + + case LZMA_FORMAT_ERROR: + return _("File format not recognized"); + + case LZMA_OPTIONS_ERROR: + return _("Unsupported options"); + + case LZMA_DATA_ERROR: + return _("Compressed data is corrupt"); + + case LZMA_BUF_ERROR: + return _("Unexpected end of input"); + + case LZMA_OK: + case LZMA_STREAM_END: + case LZMA_GET_CHECK: + case LZMA_PROG_ERROR: + case LZMA_SEEK_NEEDED: + case LZMA_RET_INTERNAL1: + case LZMA_RET_INTERNAL2: + case LZMA_RET_INTERNAL3: + case LZMA_RET_INTERNAL4: + case LZMA_RET_INTERNAL5: + case LZMA_RET_INTERNAL6: + case LZMA_RET_INTERNAL7: + case LZMA_RET_INTERNAL8: + // Without "default", compiler will warn if new constants + // are added to lzma_ret, it is not too easy to forget to + // add the new constants to this function. + break; + } + + return _("Internal error (bug)"); +} + + +extern void +message_mem_needed(enum message_verbosity v, uint64_t memusage) +{ + if (v > verbosity) + return; + + // Convert memusage to MiB, rounding up to the next full MiB. + // This way the user can always use the displayed usage as + // the new memory usage limit. (If we rounded to the nearest, + // the user might need to +1 MiB to get high enough limit.) + memusage = round_up_to_mib(memusage); + + uint64_t memlimit = hardware_memlimit_get(opt_mode); + + // Handle the case when there is no memory usage limit. + // This way we don't print a weird message with a huge number. + if (memlimit == UINT64_MAX) { + message(v, _("%s MiB of memory is required. " + "The limiter is disabled."), + uint64_to_str(memusage, 0)); + return; + } + + // With US-ASCII: + // 2^64 with thousand separators + " MiB" suffix + '\0' = 26 + 4 + 1 + // But there may be multibyte chars so reserve enough space. + char memlimitstr[128]; + + // Show the memory usage limit as MiB unless it is less than 1 MiB. + // This way it's easy to notice errors where one has typed + // --memory=123 instead of --memory=123MiB. + if (memlimit < (UINT32_C(1) << 20)) { + snprintf(memlimitstr, sizeof(memlimitstr), "%s B", + uint64_to_str(memlimit, 1)); + } else { + // Round up just like with memusage. If this function is + // called for informational purposes (to just show the + // current usage and limit), we should never show that + // the usage is higher than the limit, which would give + // a false impression that the memory usage limit isn't + // properly enforced. + snprintf(memlimitstr, sizeof(memlimitstr), "%s MiB", + uint64_to_str(round_up_to_mib(memlimit), 1)); + } + + message(v, _("%s MiB of memory is required. The limit is %s."), + uint64_to_str(memusage, 0), memlimitstr); + + return; +} + + +extern void +message_filters_show(enum message_verbosity v, const lzma_filter *filters) +{ + if (v > verbosity) + return; + + char *buf; + const lzma_ret ret = lzma_str_from_filters(&buf, filters, + LZMA_STR_ENCODER | LZMA_STR_GETOPT_LONG, NULL); + if (ret != LZMA_OK) + message_fatal("%s", message_strm(ret)); + + fprintf(stderr, _("%s: Filter chain: %s\n"), progname, buf); + free(buf); + return; +} + + +extern void +message_try_help(void) +{ + // Print this with V_WARNING instead of V_ERROR to prevent it from + // showing up when --quiet has been specified. + message(V_WARNING, _("Try `%s --help' for more information."), + progname); + return; +} + + +extern void +message_version(void) +{ + // It is possible that liblzma version is different than the command + // line tool version, so print both. + if (opt_robot) { + printf("XZ_VERSION=%" PRIu32 "\nLIBLZMA_VERSION=%" PRIu32 "\n", + LZMA_VERSION, lzma_version_number()); + } else { + printf("xz (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"); + printf("liblzma %s\n", lzma_version_string()); + } + + tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); +} + + +extern void +message_help(bool long_help) +{ + printf(_("Usage: %s [OPTION]... [FILE]...\n" + "Compress or decompress FILEs in the .xz format.\n\n"), + progname); + + // NOTE: The short help doesn't currently have options that + // take arguments. + if (long_help) + puts(_("Mandatory arguments to long options are mandatory " + "for short options too.\n")); + + if (long_help) + puts(_(" Operation mode:\n")); + + puts(_( +" -z, --compress force compression\n" +" -d, --decompress force decompression\n" +" -t, --test test compressed file integrity\n" +" -l, --list list information about .xz files")); + + if (long_help) + puts(_("\n Operation modifiers:\n")); + + puts(_( +" -k, --keep keep (don't delete) input files\n" +" -f, --force force overwrite of output file and (de)compress links\n" +" -c, --stdout write to standard output and don't delete input files")); + // NOTE: --to-stdout isn't included above because it's not + // the recommended spelling. It was copied from gzip but other + // compressors with gzip-like syntax don't support it. + + if (long_help) { + puts(_( +" --single-stream decompress only the first stream, and silently\n" +" ignore possible remaining input data")); + puts(_( +" --no-sparse do not create sparse files when decompressing\n" +" -S, --suffix=.SUF use the suffix `.SUF' on compressed files\n" +" --files[=FILE] read filenames to process from FILE; if FILE is\n" +" omitted, filenames are read from the standard input;\n" +" filenames must be terminated with the newline character\n" +" --files0[=FILE] like --files but use the null character as terminator")); + } + + if (long_help) { + puts(_("\n Basic file format and compression options:\n")); + puts(_( +" -F, --format=FMT file format to encode or decode; possible values are\n" +" `auto' (default), `xz', `lzma', `lzip', and `raw'\n" +" -C, --check=CHECK integrity check type: `none' (use with caution),\n" +" `crc32', `crc64' (default), or `sha256'")); + puts(_( +" --ignore-check don't verify the integrity check when decompressing")); + } + + puts(_( +" -0 ... -9 compression preset; default is 6; take compressor *and*\n" +" decompressor memory usage into account before using 7-9!")); + + puts(_( +" -e, --extreme try to improve compression ratio by using more CPU time;\n" +" does not affect decompressor memory requirements")); + + puts(_( +" -T, --threads=NUM use at most NUM threads; the default is 1; set to 0\n" +" to use as many threads as there are processor cores")); + + if (long_help) { + puts(_( +" --block-size=SIZE\n" +" start a new .xz block after every SIZE bytes of input;\n" +" use this to set the block size for threaded compression")); + puts(_( +" --block-list=SIZES\n" +" start a new .xz block after the given comma-separated\n" +" intervals of uncompressed data")); + puts(_( +" --flush-timeout=TIMEOUT\n" +" when compressing, if more than TIMEOUT milliseconds has\n" +" passed since the previous flush and reading more input\n" +" would block, all pending data is flushed out" + )); + puts(_( // xgettext:no-c-format +" --memlimit-compress=LIMIT\n" +" --memlimit-decompress=LIMIT\n" +" --memlimit-mt-decompress=LIMIT\n" +" -M, --memlimit=LIMIT\n" +" set memory usage limit for compression, decompression,\n" +" threaded decompression, or all of these; LIMIT is in\n" +" bytes, % of RAM, or 0 for defaults")); + + puts(_( +" --no-adjust if compression settings exceed the memory usage limit,\n" +" give an error instead of adjusting the settings downwards")); + } + + if (long_help) { + puts(_( +"\n Custom filter chain for compression (alternative for using presets):")); + +#if defined(HAVE_ENCODER_LZMA1) || defined(HAVE_DECODER_LZMA1) \ + || defined(HAVE_ENCODER_LZMA2) || defined(HAVE_DECODER_LZMA2) + // TRANSLATORS: The word "literal" in "literal context bits" + // means how many "context bits" to use when encoding + // literals. A literal is a single 8-bit byte. It doesn't + // mean "literally" here. + puts(_( +"\n" +" --lzma1[=OPTS] LZMA1 or LZMA2; OPTS is a comma-separated list of zero or\n" +" --lzma2[=OPTS] more of the following options (valid values; default):\n" +" preset=PRE reset options to a preset (0-9[e])\n" +" dict=NUM dictionary size (4KiB - 1536MiB; 8MiB)\n" +" lc=NUM number of literal context bits (0-4; 3)\n" +" lp=NUM number of literal position bits (0-4; 0)\n" +" pb=NUM number of position bits (0-4; 2)\n" +" mode=MODE compression mode (fast, normal; normal)\n" +" nice=NUM nice length of a match (2-273; 64)\n" +" mf=NAME match finder (hc3, hc4, bt2, bt3, bt4; bt4)\n" +" depth=NUM maximum search depth; 0=automatic (default)")); +#endif + + puts(_( +"\n" +" --x86[=OPTS] x86 BCJ filter (32-bit and 64-bit)\n" +" --arm[=OPTS] ARM BCJ filter\n" +" --armthumb[=OPTS] ARM-Thumb BCJ filter\n" +" --arm64[=OPTS] ARM64 BCJ filter\n" +" --powerpc[=OPTS] PowerPC BCJ filter (big endian only)\n" +" --ia64[=OPTS] IA-64 (Itanium) BCJ filter\n" +" --sparc[=OPTS] SPARC BCJ filter\n" +" Valid OPTS for all BCJ filters:\n" +" start=NUM start offset for conversions (default=0)")); + +#if defined(HAVE_ENCODER_DELTA) || defined(HAVE_DECODER_DELTA) + puts(_( +"\n" +" --delta[=OPTS] Delta filter; valid OPTS (valid values; default):\n" +" dist=NUM distance between bytes being subtracted\n" +" from each other (1-256; 1)")); +#endif + } + + if (long_help) + puts(_("\n Other options:\n")); + + puts(_( +" -q, --quiet suppress warnings; specify twice to suppress errors too\n" +" -v, --verbose be verbose; specify twice for even more verbose")); + + if (long_help) { + puts(_( +" -Q, --no-warn make warnings not affect the exit status")); + puts(_( +" --robot use machine-parsable messages (useful for scripts)")); + puts(""); + puts(_( +" --info-memory display the total amount of RAM and the currently active\n" +" memory usage limits, and exit")); + puts(_( +" -h, --help display the short help (lists only the basic options)\n" +" -H, --long-help display this long help and exit")); + } else { + puts(_( +" -h, --help display this short help and exit\n" +" -H, --long-help display the long help (lists also the advanced options)")); + } + + puts(_( +" -V, --version display the version number and exit")); + + puts(_("\nWith no FILE, or when FILE is -, read standard input.\n")); + + // TRANSLATORS: This message indicates the bug reporting address + // for this package. Please add _another line_ saying + // "Report translation bugs to <...>\n" with the email or WWW + // address for translation bugs. Thanks. + printf(_("Report bugs to <%s> (in English or Finnish).\n"), + PACKAGE_BUGREPORT); + printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); + +#if LZMA_VERSION_STABILITY != LZMA_VERSION_STABILITY_STABLE + puts(_( +"THIS IS A DEVELOPMENT VERSION NOT INTENDED FOR PRODUCTION USE.")); +#endif + + tuklib_exit(E_SUCCESS, E_ERROR, verbosity != V_SILENT); +} diff --git a/src/xz/message.h b/src/xz/message.h new file mode 100644 index 0000000..b264f82 --- /dev/null +++ b/src/xz/message.h @@ -0,0 +1,152 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file message.h +/// \brief Printing messages to stderr +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// Verbosity levels +enum message_verbosity { + V_SILENT, ///< No messages + V_ERROR, ///< Only error messages + V_WARNING, ///< Errors and warnings + V_VERBOSE, ///< Errors, warnings, and verbose statistics + V_DEBUG, ///< Very verbose +}; + + +/// \brief Signals used for progress message handling +extern const int message_progress_sigs[]; + + +/// \brief Initializes the message functions +/// +/// If an error occurs, this function doesn't return. +/// +extern void message_init(void); + + +/// Increase verbosity level by one step unless it was at maximum. +extern void message_verbosity_increase(void); + +/// Decrease verbosity level by one step unless it was at minimum. +extern void message_verbosity_decrease(void); + +/// Get the current verbosity level. +extern enum message_verbosity message_verbosity_get(void); + + +/// \brief Print a message if verbosity level is at least "verbosity" +/// +/// This doesn't touch the exit status. +extern void message(enum message_verbosity verbosity, const char *fmt, ...) + lzma_attribute((__format__(__printf__, 2, 3))); + + +/// \brief Prints a warning and possibly sets exit status +/// +/// The message is printed only if verbosity level is at least V_WARNING. +/// The exit status is set to WARNING unless it was already at ERROR. +extern void message_warning(const char *fmt, ...) + lzma_attribute((__format__(__printf__, 1, 2))); + + +/// \brief Prints an error message and sets exit status +/// +/// The message is printed only if verbosity level is at least V_ERROR. +/// The exit status is set to ERROR. +extern void message_error(const char *fmt, ...) + lzma_attribute((__format__(__printf__, 1, 2))); + + +/// \brief Prints an error message and exits with EXIT_ERROR +/// +/// The message is printed only if verbosity level is at least V_ERROR. +extern void message_fatal(const char *fmt, ...) + lzma_attribute((__format__(__printf__, 1, 2))) + lzma_attribute((__noreturn__)); + + +/// Print an error message that an internal error occurred and exit with +/// EXIT_ERROR. +extern void message_bug(void) lzma_attribute((__noreturn__)); + + +/// Print a message that establishing signal handlers failed, and exit with +/// exit status ERROR. +extern void message_signal_handler(void) lzma_attribute((__noreturn__)); + + +/// Convert lzma_ret to a string. +extern const char *message_strm(lzma_ret code); + + +/// Display how much memory was needed and how much the limit was. +extern void message_mem_needed(enum message_verbosity v, uint64_t memusage); + + +/// Print the filter chain. +extern void message_filters_show( + enum message_verbosity v, const lzma_filter *filters); + + +/// Print a message that user should try --help. +extern void message_try_help(void); + + +/// Prints the version number to stdout and exits with exit status SUCCESS. +extern void message_version(void) lzma_attribute((__noreturn__)); + + +/// Print the help message. +extern void message_help(bool long_help) lzma_attribute((__noreturn__)); + + +/// \brief Set the total number of files to be processed +/// +/// Standard input is counted as a file here. This is used when printing +/// the filename via message_filename(). +extern void message_set_files(unsigned int files); + + +/// \brief Set the name of the current file and possibly print it too +/// +/// The name is printed immediately if --list was used or if --verbose +/// was used and stderr is a terminal. Even when the filename isn't printed, +/// it is stored so that it can be printed later if needed for progress +/// messages. +extern void message_filename(const char *src_name); + + +/// \brief Start progress info handling +/// +/// message_filename() must be called before this function to set +/// the filename. +/// +/// This must be paired with a call to message_progress_end() before the +/// given *strm becomes invalid. +/// +/// \param strm Pointer to lzma_stream used for the coding. +/// \param in_size Size of the input file, or zero if unknown. +/// +extern void message_progress_start(lzma_stream *strm, + bool is_passthru, uint64_t in_size); + + +/// Update the progress info if in verbose mode and enough time has passed +/// since the previous update. This can be called only when +/// message_progress_start() has already been used. +extern void message_progress_update(void); + + +/// \brief Finishes the progress message if we were in verbose mode +/// +/// \param finished True if the whole stream was successfully coded +/// and output written to the output stream. +/// +extern void message_progress_end(bool finished); diff --git a/src/xz/mytime.c b/src/xz/mytime.c new file mode 100644 index 0000000..7e8a074 --- /dev/null +++ b/src/xz/mytime.c @@ -0,0 +1,86 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file mytime.c +/// \brief Time handling functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#if defined(HAVE_CLOCK_GETTIME) && defined(HAVE_CLOCK_MONOTONIC) +# include <time.h> +#else +# include <sys/time.h> +#endif + +uint64_t opt_flush_timeout = 0; + +static uint64_t start_time; +static uint64_t next_flush; + + +/// \brief Get the current time as milliseconds +/// +/// It's relative to some point but not necessarily to the UNIX Epoch. +static uint64_t +mytime_now(void) +{ +#if defined(HAVE_CLOCK_GETTIME) && defined(HAVE_CLOCK_MONOTONIC) + // If CLOCK_MONOTONIC was available at compile time but for some + // reason isn't at runtime, fallback to CLOCK_REALTIME which + // according to POSIX is mandatory for all implementations. + static clockid_t clk_id = CLOCK_MONOTONIC; + struct timespec tv; + while (clock_gettime(clk_id, &tv)) + clk_id = CLOCK_REALTIME; + + return (uint64_t)tv.tv_sec * 1000 + (uint64_t)(tv.tv_nsec / 1000000); +#else + struct timeval tv; + gettimeofday(&tv, NULL); + return (uint64_t)tv.tv_sec * 1000 + (uint64_t)(tv.tv_usec / 1000); +#endif +} + + +extern void +mytime_set_start_time(void) +{ + start_time = mytime_now(); + return; +} + + +extern uint64_t +mytime_get_elapsed(void) +{ + return mytime_now() - start_time; +} + + +extern void +mytime_set_flush_time(void) +{ + next_flush = mytime_now() + opt_flush_timeout; + return; +} + + +extern int +mytime_get_flush_timeout(void) +{ + if (opt_flush_timeout == 0 || opt_mode != MODE_COMPRESS) + return -1; + + const uint64_t now = mytime_now(); + if (now >= next_flush) + return 0; + + const uint64_t remaining = next_flush - now; + return remaining > INT_MAX ? INT_MAX : (int)remaining; +} diff --git a/src/xz/mytime.h b/src/xz/mytime.h new file mode 100644 index 0000000..a7be2aa --- /dev/null +++ b/src/xz/mytime.h @@ -0,0 +1,43 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file mytime.h +/// \brief Time handling functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + + +/// \brief Number of milliseconds to between LZMA_SYNC_FLUSHes +/// +/// If 0, timed flushing is disabled. Otherwise if no more input is available +/// and not at the end of the file and at least opt_flush_timeout milliseconds +/// has elapsed since the start of compression or the previous flushing +/// (LZMA_SYNC_FLUSH or LZMA_FULL_FLUSH), set LZMA_SYNC_FLUSH to flush +/// the pending data. +extern uint64_t opt_flush_timeout; + + +/// \brief Store the time when (de)compression was started +/// +/// The start time is also stored as the time of the first flush. +extern void mytime_set_start_time(void); + + +/// \brief Get the number of milliseconds since the operation started +extern uint64_t mytime_get_elapsed(void); + + +/// \brief Store the time of when compressor was flushed +extern void mytime_set_flush_time(void); + + +/// \brief Get the number of milliseconds until the next flush +/// +/// This returns -1 if no timed flushing is used. +/// +/// The return value is intended for use with poll(). +extern int mytime_get_flush_timeout(void); diff --git a/src/xz/options.c b/src/xz/options.c new file mode 100644 index 0000000..b434b0c --- /dev/null +++ b/src/xz/options.c @@ -0,0 +1,358 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file options.c +/// \brief Parser for filter-specific options +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +/////////////////// +// Generic stuff // +/////////////////// + +typedef struct { + const char *name; + uint64_t id; +} name_id_map; + + +typedef struct { + const char *name; + const name_id_map *map; + uint64_t min; + uint64_t max; +} option_map; + + +/// Parses option=value pairs that are separated with commas: +/// opt=val,opt=val,opt=val +/// +/// Each option is a string, that is converted to an integer using the +/// index where the option string is in the array. +/// +/// Value can be +/// - a string-id map mapping a list of possible string values to integers +/// (opts[i].map != NULL, opts[i].min and opts[i].max are ignored); +/// - a number with minimum and maximum value limit +/// (opts[i].map == NULL && opts[i].min != UINT64_MAX); +/// - a string that will be parsed by the filter-specific code +/// (opts[i].map == NULL && opts[i].min == UINT64_MAX, opts[i].max ignored) +/// +/// When parsing both option and value succeed, a filter-specific function +/// is called, which should update the given value to filter-specific +/// options structure. +/// +/// \param str String containing the options from the command line +/// \param opts Filter-specific option map +/// \param set Filter-specific function to update filter_options +/// \param filter_options Pointer to filter-specific options structure +/// +/// \return Returns only if no errors occur. +/// +static void +parse_options(const char *str, const option_map *opts, + void (*set)(void *filter_options, + unsigned key, uint64_t value, const char *valuestr), + void *filter_options) +{ + if (str == NULL || str[0] == '\0') + return; + + char *s = xstrdup(str); + char *name = s; + + while (*name != '\0') { + if (*name == ',') { + ++name; + continue; + } + + char *split = strchr(name, ','); + if (split != NULL) + *split = '\0'; + + char *value = strchr(name, '='); + if (value != NULL) + *value++ = '\0'; + + if (value == NULL || value[0] == '\0') + message_fatal(_("%s: Options must be `name=value' " + "pairs separated with commas"), str); + + // Look for the option name from the option map. + unsigned i = 0; + while (true) { + if (opts[i].name == NULL) + message_fatal(_("%s: Invalid option name"), + name); + + if (strcmp(name, opts[i].name) == 0) + break; + + ++i; + } + + // Option was found from the map. See how we should handle it. + if (opts[i].map != NULL) { + // value is a string which we should map + // to an integer. + unsigned j; + for (j = 0; opts[i].map[j].name != NULL; ++j) { + if (strcmp(opts[i].map[j].name, value) == 0) + break; + } + + if (opts[i].map[j].name == NULL) + message_fatal(_("%s: Invalid option value"), + value); + + set(filter_options, i, opts[i].map[j].id, value); + + } else if (opts[i].min == UINT64_MAX) { + // value is a special string that will be + // parsed by set(). + set(filter_options, i, 0, value); + + } else { + // value is an integer. + const uint64_t v = str_to_uint64(name, value, + opts[i].min, opts[i].max); + set(filter_options, i, v, value); + } + + // Check if it was the last option. + if (split == NULL) + break; + + name = split + 1; + } + + free(s); + return; +} + + +/////////// +// Delta // +/////////// + +enum { + OPT_DIST, +}; + + +static void +set_delta(void *options, unsigned key, uint64_t value, + const char *valuestr lzma_attribute((__unused__))) +{ + lzma_options_delta *opt = options; + switch (key) { + case OPT_DIST: + opt->dist = value; + break; + } +} + + +extern lzma_options_delta * +options_delta(const char *str) +{ + static const option_map opts[] = { + { "dist", NULL, LZMA_DELTA_DIST_MIN, + LZMA_DELTA_DIST_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_delta *options = xmalloc(sizeof(lzma_options_delta)); + *options = (lzma_options_delta){ + // It's hard to give a useful default for this. + .type = LZMA_DELTA_TYPE_BYTE, + .dist = LZMA_DELTA_DIST_MIN, + }; + + parse_options(str, opts, &set_delta, options); + + return options; +} + + +///////// +// BCJ // +///////// + +enum { + OPT_START_OFFSET, +}; + + +static void +set_bcj(void *options, unsigned key, uint64_t value, + const char *valuestr lzma_attribute((__unused__))) +{ + lzma_options_bcj *opt = options; + switch (key) { + case OPT_START_OFFSET: + opt->start_offset = value; + break; + } +} + + +extern lzma_options_bcj * +options_bcj(const char *str) +{ + static const option_map opts[] = { + { "start", NULL, 0, UINT32_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_bcj *options = xmalloc(sizeof(lzma_options_bcj)); + *options = (lzma_options_bcj){ + .start_offset = 0, + }; + + parse_options(str, opts, &set_bcj, options); + + return options; +} + + +////////// +// LZMA // +////////// + +enum { + OPT_PRESET, + OPT_DICT, + OPT_LC, + OPT_LP, + OPT_PB, + OPT_MODE, + OPT_NICE, + OPT_MF, + OPT_DEPTH, +}; + + +static void lzma_attribute((__noreturn__)) +error_lzma_preset(const char *valuestr) +{ + message_fatal(_("Unsupported LZMA1/LZMA2 preset: %s"), valuestr); +} + + +static void +set_lzma(void *options, unsigned key, uint64_t value, const char *valuestr) +{ + lzma_options_lzma *opt = options; + + switch (key) { + case OPT_PRESET: { + if (valuestr[0] < '0' || valuestr[0] > '9') + error_lzma_preset(valuestr); + + uint32_t preset = (uint32_t)(valuestr[0] - '0'); + + // Currently only "e" is supported as a modifier, + // so keep this simple for now. + if (valuestr[1] != '\0') { + if (valuestr[1] == 'e') + preset |= LZMA_PRESET_EXTREME; + else + error_lzma_preset(valuestr); + + if (valuestr[2] != '\0') + error_lzma_preset(valuestr); + } + + if (lzma_lzma_preset(options, preset)) + error_lzma_preset(valuestr); + + break; + } + + case OPT_DICT: + opt->dict_size = value; + break; + + case OPT_LC: + opt->lc = value; + break; + + case OPT_LP: + opt->lp = value; + break; + + case OPT_PB: + opt->pb = value; + break; + + case OPT_MODE: + opt->mode = value; + break; + + case OPT_NICE: + opt->nice_len = value; + break; + + case OPT_MF: + opt->mf = value; + break; + + case OPT_DEPTH: + opt->depth = value; + break; + } +} + + +extern lzma_options_lzma * +options_lzma(const char *str) +{ + static const name_id_map modes[] = { + { "fast", LZMA_MODE_FAST }, + { "normal", LZMA_MODE_NORMAL }, + { NULL, 0 } + }; + + static const name_id_map mfs[] = { + { "hc3", LZMA_MF_HC3 }, + { "hc4", LZMA_MF_HC4 }, + { "bt2", LZMA_MF_BT2 }, + { "bt3", LZMA_MF_BT3 }, + { "bt4", LZMA_MF_BT4 }, + { NULL, 0 } + }; + + static const option_map opts[] = { + { "preset", NULL, UINT64_MAX, 0 }, + { "dict", NULL, LZMA_DICT_SIZE_MIN, + (UINT32_C(1) << 30) + (UINT32_C(1) << 29) }, + { "lc", NULL, LZMA_LCLP_MIN, LZMA_LCLP_MAX }, + { "lp", NULL, LZMA_LCLP_MIN, LZMA_LCLP_MAX }, + { "pb", NULL, LZMA_PB_MIN, LZMA_PB_MAX }, + { "mode", modes, 0, 0 }, + { "nice", NULL, 2, 273 }, + { "mf", mfs, 0, 0 }, + { "depth", NULL, 0, UINT32_MAX }, + { NULL, NULL, 0, 0 } + }; + + lzma_options_lzma *options = xmalloc(sizeof(lzma_options_lzma)); + if (lzma_lzma_preset(options, LZMA_PRESET_DEFAULT)) + message_bug(); + + parse_options(str, opts, &set_lzma, options); + + if (options->lc + options->lp > LZMA_LCLP_MAX) + message_fatal(_("The sum of lc and lp must not exceed 4")); + + return options; +} diff --git a/src/xz/options.h b/src/xz/options.h new file mode 100644 index 0000000..61ec8d5 --- /dev/null +++ b/src/xz/options.h @@ -0,0 +1,31 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file options.h +/// \brief Parser for filter-specific options +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// \brief Parser for Delta options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_delta *options_delta(const char *str); + + +/// \brief Parser for BCJ options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_bcj *options_bcj(const char *str); + + +/// \brief Parser for LZMA options +/// +/// \return Pointer to allocated options structure. +/// Doesn't return on error. +extern lzma_options_lzma *options_lzma(const char *str); diff --git a/src/xz/private.h b/src/xz/private.h new file mode 100644 index 0000000..6414bdb --- /dev/null +++ b/src/xz/private.h @@ -0,0 +1,66 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file private.h +/// \brief Common includes, definitions, and prototypes +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "sysdefs.h" +#include "mythread.h" + +#include "lzma.h" + +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <signal.h> +#include <locale.h> +#include <stdio.h> +#include <unistd.h> + +#include "tuklib_gettext.h" +#include "tuklib_progname.h" +#include "tuklib_exit.h" +#include "tuklib_mbstr.h" + +#if defined(_WIN32) && !defined(__CYGWIN__) +# define WIN32_LEAN_AND_MEAN +# include <windows.h> +#endif + +#ifndef STDIN_FILENO +# define STDIN_FILENO (fileno(stdin)) +#endif + +#ifndef STDOUT_FILENO +# define STDOUT_FILENO (fileno(stdout)) +#endif + +#ifndef STDERR_FILENO +# define STDERR_FILENO (fileno(stderr)) +#endif + +#if defined(HAVE_CAPSICUM) || defined(HAVE_PLEDGE) +# define ENABLE_SANDBOX 1 +#endif + +#include "main.h" +#include "mytime.h" +#include "coder.h" +#include "message.h" +#include "args.h" +#include "hardware.h" +#include "file_io.h" +#include "options.h" +#include "signals.h" +#include "suffix.h" +#include "util.h" + +#ifdef HAVE_DECODERS +# include "list.h" +#endif diff --git a/src/xz/signals.c b/src/xz/signals.c new file mode 100644 index 0000000..7aef463 --- /dev/null +++ b/src/xz/signals.c @@ -0,0 +1,209 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file signals.c +/// \brief Handling signals to abort operation +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + + +volatile sig_atomic_t user_abort = false; + + +#if !(defined(_WIN32) && !defined(__CYGWIN__)) + +/// If we were interrupted by a signal, we store the signal number so that +/// we can raise that signal to kill the program when all cleanups have +/// been done. +static volatile sig_atomic_t exit_signal = 0; + +/// Mask of signals for which we have established a signal handler to set +/// user_abort to true. +static sigset_t hooked_signals; + +/// True once signals_init() has finished. This is used to skip blocking +/// signals (with uninitialized hooked_signals) if signals_block() and +/// signals_unblock() are called before signals_init() has been called. +static bool signals_are_initialized = false; + +/// signals_block() and signals_unblock() can be called recursively. +static size_t signals_block_count = 0; + + +static void +signal_handler(int sig) +{ + exit_signal = sig; + user_abort = true; + +#ifndef TUKLIB_DOSLIKE + io_write_to_user_abort_pipe(); +#endif + + return; +} + + +extern void +signals_init(void) +{ + // List of signals for which we establish the signal handler. + static const int sigs[] = { + SIGINT, + SIGTERM, +#ifdef SIGHUP + SIGHUP, +#endif +#ifdef SIGPIPE + SIGPIPE, +#endif +#ifdef SIGXCPU + SIGXCPU, +#endif +#ifdef SIGXFSZ + SIGXFSZ, +#endif + }; + + // Mask of the signals for which we have established a signal handler. + sigemptyset(&hooked_signals); + for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) + sigaddset(&hooked_signals, sigs[i]); + +#ifdef SIGALRM + // Add also the signals from message.c to hooked_signals. + for (size_t i = 0; message_progress_sigs[i] != 0; ++i) + sigaddset(&hooked_signals, message_progress_sigs[i]); +#endif + + // Using "my_sa" because "sa" may conflict with a sockaddr variable + // from system headers on Solaris. + struct sigaction my_sa; + + // All the signals that we handle we also blocked while the signal + // handler runs. + my_sa.sa_mask = hooked_signals; + + // Don't set SA_RESTART, because we want EINTR so that we can check + // for user_abort and cleanup before exiting. We block the signals + // for which we have established a handler when we don't want EINTR. + my_sa.sa_flags = 0; + my_sa.sa_handler = &signal_handler; + + for (size_t i = 0; i < ARRAY_SIZE(sigs); ++i) { + // If the parent process has left some signals ignored, + // we don't unignore them. + struct sigaction old; + if (sigaction(sigs[i], NULL, &old) == 0 + && old.sa_handler == SIG_IGN) + continue; + + // Establish the signal handler. + if (sigaction(sigs[i], &my_sa, NULL)) + message_signal_handler(); + } + + signals_are_initialized = true; + + return; +} + + +#ifndef __VMS +extern void +signals_block(void) +{ + if (signals_are_initialized) { + if (signals_block_count++ == 0) { + const int saved_errno = errno; + mythread_sigmask(SIG_BLOCK, &hooked_signals, NULL); + errno = saved_errno; + } + } + + return; +} + + +extern void +signals_unblock(void) +{ + if (signals_are_initialized) { + assert(signals_block_count > 0); + + if (--signals_block_count == 0) { + const int saved_errno = errno; + mythread_sigmask(SIG_UNBLOCK, &hooked_signals, NULL); + errno = saved_errno; + } + } + + return; +} +#endif + + +extern void +signals_exit(void) +{ + const int sig = (int)exit_signal; + + if (sig != 0) { +#if defined(TUKLIB_DOSLIKE) || defined(__VMS) + // Don't raise(), set only exit status. This avoids + // printing unwanted message about SIGINT when the user + // presses C-c. + set_exit_status(E_ERROR); +#else + struct sigaction sa; + sa.sa_handler = SIG_DFL; + sigfillset(&sa.sa_mask); + sa.sa_flags = 0; + sigaction(sig, &sa, NULL); + raise(sig); +#endif + } + + return; +} + +#else + +// While Windows has some very basic signal handling functions as required +// by C89, they are not really used, and e.g. SIGINT doesn't work exactly +// the way it does on POSIX (Windows creates a new thread for the signal +// handler). Instead, we use SetConsoleCtrlHandler() to catch user +// pressing C-c, because that seems to be the recommended way to do it. +// +// NOTE: This doesn't work under MSYS. Trying with SIGINT doesn't work +// either even if it appeared to work at first. So test using Windows +// console window. + +static BOOL WINAPI +signal_handler(DWORD type lzma_attribute((__unused__))) +{ + // Since we don't get a signal number which we could raise() at + // signals_exit() like on POSIX, just set the exit status to + // indicate an error, so that we cannot return with zero exit status. + set_exit_status(E_ERROR); + user_abort = true; + return TRUE; +} + + +extern void +signals_init(void) +{ + if (!SetConsoleCtrlHandler(&signal_handler, TRUE)) + message_signal_handler(); + + return; +} + +#endif diff --git a/src/xz/signals.h b/src/xz/signals.h new file mode 100644 index 0000000..5b125e0 --- /dev/null +++ b/src/xz/signals.h @@ -0,0 +1,43 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file signals.h +/// \brief Handling signals to abort operation +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// If this is true, we will clean up the possibly incomplete output file, +/// return to main() as soon as practical. That is, the code needs to poll +/// this variable in various places. +extern volatile sig_atomic_t user_abort; + + +/// Initialize the signal handler, which will set user_abort to true when +/// user e.g. presses C-c. +extern void signals_init(void); + + +#if (defined(_WIN32) && !defined(__CYGWIN__)) || defined(__VMS) +# define signals_block() do { } while (0) +# define signals_unblock() do { } while (0) +#else +/// Block the signals which don't have SA_RESTART and which would just set +/// user_abort to true. This is handy when we don't want to handle EINTR +/// and don't want SA_RESTART either. +extern void signals_block(void); + +/// Unblock the signals blocked by signals_block(). +extern void signals_unblock(void); +#endif + +#if defined(_WIN32) && !defined(__CYGWIN__) +# define signals_exit() do { } while (0) +#else +/// If user has sent us a signal earlier to terminate the process, +/// re-raise that signal to actually terminate the process. +extern void signals_exit(void); +#endif diff --git a/src/xz/suffix.c b/src/xz/suffix.c new file mode 100644 index 0000000..09add38 --- /dev/null +++ b/src/xz/suffix.c @@ -0,0 +1,411 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file suffix.c +/// \brief Checks filename suffix and creates the destination filename +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" + +#ifdef __DJGPP__ +# include <fcntl.h> +#endif + +// For case-insensitive filename suffix on case-insensitive systems +#if defined(TUKLIB_DOSLIKE) || defined(__VMS) +# ifdef HAVE_STRINGS_H +# include <strings.h> +# endif +# define strcmp strcasecmp +#endif + + +static char *custom_suffix = NULL; + + +/// \brief Test if the char is a directory separator +static bool +is_dir_sep(char c) +{ +#ifdef TUKLIB_DOSLIKE + return c == '/' || c == '\\' || c == ':'; +#else + return c == '/'; +#endif +} + + +/// \brief Test if the string contains a directory separator +static bool +has_dir_sep(const char *str) +{ +#ifdef TUKLIB_DOSLIKE + return strpbrk(str, "/\\:") != NULL; +#else + return strchr(str, '/') != NULL; +#endif +} + + +#ifdef __DJGPP__ +/// \brief Test for special suffix used for 8.3 short filenames (SFN) +/// +/// \return If str matches *.?- or *.??-, true is returned. Otherwise +/// false is returned. +static bool +has_sfn_suffix(const char *str, size_t len) +{ + if (len >= 4 && str[len - 1] == '-' && str[len - 2] != '.' + && !is_dir_sep(str[len - 2])) { + // *.?- + if (str[len - 3] == '.') + return !is_dir_sep(str[len - 4]); + + // *.??- + if (len >= 5 && !is_dir_sep(str[len - 3]) + && str[len - 4] == '.') + return !is_dir_sep(str[len - 5]); + } + + return false; +} +#endif + + +/// \brief Checks if src_name has given compressed_suffix +/// +/// \param suffix Filename suffix to look for +/// \param src_name Input filename +/// \param src_len strlen(src_name) +/// +/// \return If src_name has the suffix, src_len - strlen(suffix) is +/// returned. It's always a positive integer. Otherwise zero +/// is returned. +static size_t +test_suffix(const char *suffix, const char *src_name, size_t src_len) +{ + const size_t suffix_len = strlen(suffix); + + // The filename must have at least one character in addition to + // the suffix. src_name may contain path to the filename, so we + // need to check for directory separator too. + if (src_len <= suffix_len + || is_dir_sep(src_name[src_len - suffix_len - 1])) + return 0; + + if (strcmp(suffix, src_name + src_len - suffix_len) == 0) + return src_len - suffix_len; + + return 0; +} + + +/// \brief Removes the filename suffix of the compressed file +/// +/// \return Name of the uncompressed file, or NULL if file has unknown +/// suffix. +static char * +uncompressed_name(const char *src_name, const size_t src_len) +{ + static const struct { + const char *compressed; + const char *uncompressed; + } suffixes[] = { + { ".xz", "" }, + { ".txz", ".tar" }, // .txz abbreviation for .txt.gz is rare. + { ".lzma", "" }, +#ifdef __DJGPP__ + { ".lzm", "" }, +#endif + { ".tlz", ".tar" }, // Both .tar.lzma and .tar.lz +#ifdef HAVE_LZIP_DECODER + { ".lz", "" }, +#endif + }; + + const char *new_suffix = ""; + size_t new_len = 0; + + if (opt_format == FORMAT_RAW) { + // Don't check for known suffixes when --format=raw was used. + if (custom_suffix == NULL) { + message_error(_("%s: With --format=raw, " + "--suffix=.SUF is required unless " + "writing to stdout"), src_name); + return NULL; + } + } else { + for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) { + new_len = test_suffix(suffixes[i].compressed, + src_name, src_len); + if (new_len != 0) { + new_suffix = suffixes[i].uncompressed; + break; + } + } + +#ifdef __DJGPP__ + // Support also *.?- -> *.? and *.??- -> *.?? on DOS. + // This is done also when long filenames are available + // to keep it easy to decompress files created when + // long filename support wasn't available. + if (new_len == 0 && has_sfn_suffix(src_name, src_len)) { + new_suffix = ""; + new_len = src_len - 1; + } +#endif + } + + if (new_len == 0 && custom_suffix != NULL) + new_len = test_suffix(custom_suffix, src_name, src_len); + + if (new_len == 0) { + message_warning(_("%s: Filename has an unknown suffix, " + "skipping"), src_name); + return NULL; + } + + const size_t new_suffix_len = strlen(new_suffix); + char *dest_name = xmalloc(new_len + new_suffix_len + 1); + + memcpy(dest_name, src_name, new_len); + memcpy(dest_name + new_len, new_suffix, new_suffix_len); + dest_name[new_len + new_suffix_len] = '\0'; + + return dest_name; +} + + +/// This message is needed in multiple places in compressed_name(), +/// so the message has been put into its own function. +static void +msg_suffix(const char *src_name, const char *suffix) +{ + message_warning(_("%s: File already has `%s' suffix, skipping"), + src_name, suffix); + return; +} + + +/// \brief Appends suffix to src_name +/// +/// In contrast to uncompressed_name(), we check only suffixes that are valid +/// for the specified file format. +static char * +compressed_name(const char *src_name, size_t src_len) +{ + // The order of these must match the order in args.h. + static const char *const all_suffixes[][4] = { + { + ".xz", + ".txz", + NULL + }, { + ".lzma", +#ifdef __DJGPP__ + ".lzm", +#endif + ".tlz", + NULL +#ifdef HAVE_LZIP_DECODER + // This is needed to keep the table indexing in sync with + // enum format_type from coder.h. + }, { +/* + ".lz", +*/ + NULL +#endif + }, { + // --format=raw requires specifying the suffix + // manually or using stdout. + NULL + } + }; + + // args.c ensures these. + assert(opt_format != FORMAT_AUTO); +#ifdef HAVE_LZIP_DECODER + assert(opt_format != FORMAT_LZIP); +#endif + + const size_t format = opt_format - 1; + const char *const *suffixes = all_suffixes[format]; + + // Look for known filename suffixes and refuse to compress them. + for (size_t i = 0; suffixes[i] != NULL; ++i) { + if (test_suffix(suffixes[i], src_name, src_len) != 0) { + msg_suffix(src_name, suffixes[i]); + return NULL; + } + } + +#ifdef __DJGPP__ + // Recognize also the special suffix that is used when long + // filename (LFN) support isn't available. This suffix is + // recognized on LFN systems too. + if (opt_format == FORMAT_XZ && has_sfn_suffix(src_name, src_len)) { + msg_suffix(src_name, "-"); + return NULL; + } +#endif + + if (custom_suffix != NULL) { + if (test_suffix(custom_suffix, src_name, src_len) != 0) { + msg_suffix(src_name, custom_suffix); + return NULL; + } + } + + // TODO: Hmm, maybe it would be better to validate this in args.c, + // since the suffix handling when decoding is weird now. + if (opt_format == FORMAT_RAW && custom_suffix == NULL) { + message_error(_("%s: With --format=raw, " + "--suffix=.SUF is required unless " + "writing to stdout"), src_name); + return NULL; + } + + const char *suffix = custom_suffix != NULL + ? custom_suffix : suffixes[0]; + size_t suffix_len = strlen(suffix); + +#ifdef __DJGPP__ + if (!_use_lfn(src_name)) { + // Long filename (LFN) support isn't available and we are + // limited to 8.3 short filenames (SFN). + // + // Look for suffix separator from the filename, and make sure + // that it is in the filename, not in a directory name. + const char *sufsep = strrchr(src_name, '.'); + if (sufsep == NULL || sufsep[1] == '\0' + || has_dir_sep(sufsep)) { + // src_name has no filename extension. + // + // Examples: + // xz foo -> foo.xz + // xz -F lzma foo -> foo.lzm + // xz -S x foo -> foox + // xz -S x foo. -> foo.x + // xz -S x.y foo -> foox.y + // xz -S .x foo -> foo.x + // xz -S .x foo. -> foo.x + // + // Avoid double dots: + if (sufsep != NULL && sufsep[1] == '\0' + && suffix[0] == '.') + --src_len; + + } else if (custom_suffix == NULL + && strcasecmp(sufsep, ".tar") == 0) { + // ".tar" is handled specially. + // + // Examples: + // xz foo.tar -> foo.txz + // xz -F lzma foo.tar -> foo.tlz + static const char *const tar_suffixes[] = { + ".txz", // .tar.xz + ".tlz", // .tar.lzma +/* + ".tlz", // .tar.lz +*/ + }; + suffix = tar_suffixes[format]; + suffix_len = 4; + src_len -= 4; + + } else { + if (custom_suffix == NULL && opt_format == FORMAT_XZ) { + // Instead of the .xz suffix, use a single + // character at the end of the filename + // extension. This is to minimize name + // conflicts when compressing multiple files + // with the same basename. E.g. foo.txt and + // foo.exe become foo.tx- and foo.ex-. Dash + // is rare as the last character of the + // filename extension, so it seems to be + // quite safe choice and it stands out better + // in directory listings than e.g. x. For + // comparison, gzip uses z. + suffix = "-"; + suffix_len = 1; + } + + if (suffix[0] == '.') { + // The first character of the suffix is a dot. + // Throw away the original filename extension + // and replace it with the new suffix. + // + // Examples: + // xz -F lzma foo.txt -> foo.lzm + // xz -S .x foo.txt -> foo.x + src_len = sufsep - src_name; + + } else { + // The first character of the suffix is not + // a dot. Preserve the first 0-2 characters + // of the original filename extension. + // + // Examples: + // xz foo.txt -> foo.tx- + // xz -S x foo.c -> foo.cx + // xz -S ab foo.c -> foo.cab + // xz -S ab foo.txt -> foo.tab + // xz -S abc foo.txt -> foo.abc + // + // Truncate the suffix to three chars: + if (suffix_len > 3) + suffix_len = 3; + + // If needed, overwrite 1-3 characters. + if (strlen(sufsep) > 4 - suffix_len) + src_len = sufsep - src_name + + 4 - suffix_len; + } + } + } +#endif + + char *dest_name = xmalloc(src_len + suffix_len + 1); + + memcpy(dest_name, src_name, src_len); + memcpy(dest_name + src_len, suffix, suffix_len); + dest_name[src_len + suffix_len] = '\0'; + + return dest_name; +} + + +extern char * +suffix_get_dest_name(const char *src_name) +{ + assert(src_name != NULL); + + // Length of the name is needed in all cases to locate the end of + // the string to compare the suffix, so calculate the length here. + const size_t src_len = strlen(src_name); + + return opt_mode == MODE_COMPRESS + ? compressed_name(src_name, src_len) + : uncompressed_name(src_name, src_len); +} + + +extern void +suffix_set(const char *suffix) +{ + // Empty suffix and suffixes having a directory separator are + // rejected. Such suffixes would break things later. + if (suffix[0] == '\0' || has_dir_sep(suffix)) + message_fatal(_("%s: Invalid filename suffix"), suffix); + + // Replace the old custom_suffix (if any) with the new suffix. + free(custom_suffix); + custom_suffix = xstrdup(suffix); + return; +} diff --git a/src/xz/suffix.h b/src/xz/suffix.h new file mode 100644 index 0000000..5537d73 --- /dev/null +++ b/src/xz/suffix.h @@ -0,0 +1,28 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file suffix.h +/// \brief Checks filename suffix and creates the destination filename +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// \brief Get the name of the destination file +/// +/// Depending on the global variable opt_mode, this tries to find a matching +/// counterpart for src_name. If the name can be constructed, it is allocated +/// and returned (caller must free it). On error, a message is printed and +/// NULL is returned. +extern char *suffix_get_dest_name(const char *src_name); + + +/// \brief Set a custom filename suffix +/// +/// This function calls xstrdup() for the given suffix, thus the caller +/// doesn't need to keep the memory allocated. There can be only one custom +/// suffix, thus if this is called multiple times, the old suffixes are freed +/// and forgotten. +extern void suffix_set(const char *suffix); diff --git a/src/xz/util.c b/src/xz/util.c new file mode 100644 index 0000000..9f9a8fb --- /dev/null +++ b/src/xz/util.c @@ -0,0 +1,286 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file util.c +/// \brief Miscellaneous utility functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include "private.h" +#include <stdarg.h> + + +/// Buffers for uint64_to_str() and uint64_to_nicestr() +static char bufs[4][128]; + +/// Thousand separator support in uint64_to_str() and uint64_to_nicestr() +static enum { UNKNOWN, WORKS, BROKEN } thousand = UNKNOWN; + + +extern void * +xrealloc(void *ptr, size_t size) +{ + assert(size > 0); + + // Save ptr so that we can free it if realloc fails. + // The point is that message_fatal ends up calling stdio functions + // which in some libc implementations might allocate memory from + // the heap. Freeing ptr improves the chances that there's free + // memory for stdio functions if they need it. + void *p = ptr; + ptr = realloc(ptr, size); + + if (ptr == NULL) { + const int saved_errno = errno; + free(p); + message_fatal("%s", strerror(saved_errno)); + } + + return ptr; +} + + +extern char * +xstrdup(const char *src) +{ + assert(src != NULL); + const size_t size = strlen(src) + 1; + char *dest = xmalloc(size); + return memcpy(dest, src, size); +} + + +extern uint64_t +str_to_uint64(const char *name, const char *value, uint64_t min, uint64_t max) +{ + uint64_t result = 0; + + // Skip blanks. + while (*value == ' ' || *value == '\t') + ++value; + + // Accept special value "max". Supporting "min" doesn't seem useful. + if (strcmp(value, "max") == 0) + return max; + + if (*value < '0' || *value > '9') + message_fatal(_("%s: Value is not a non-negative " + "decimal integer"), value); + + do { + // Don't overflow. + if (result > UINT64_MAX / 10) + goto error; + + result *= 10; + + // Another overflow check + const uint32_t add = (uint32_t)(*value - '0'); + if (UINT64_MAX - add < result) + goto error; + + result += add; + ++value; + } while (*value >= '0' && *value <= '9'); + + if (*value != '\0') { + // Look for suffix. Originally this supported both base-2 + // and base-10, but since there seems to be little need + // for base-10 in this program, treat everything as base-2 + // and also be more relaxed about the case of the first + // letter of the suffix. + uint64_t multiplier = 0; + if (*value == 'k' || *value == 'K') + multiplier = UINT64_C(1) << 10; + else if (*value == 'm' || *value == 'M') + multiplier = UINT64_C(1) << 20; + else if (*value == 'g' || *value == 'G') + multiplier = UINT64_C(1) << 30; + + ++value; + + // Allow also e.g. Ki, KiB, and KB. + if (*value != '\0' && strcmp(value, "i") != 0 + && strcmp(value, "iB") != 0 + && strcmp(value, "B") != 0) + multiplier = 0; + + if (multiplier == 0) { + message(V_ERROR, _("%s: Invalid multiplier suffix"), + value - 1); + message_fatal(_("Valid suffixes are `KiB' (2^10), " + "`MiB' (2^20), and `GiB' (2^30).")); + } + + // Don't overflow here either. + if (result > UINT64_MAX / multiplier) + goto error; + + result *= multiplier; + } + + if (result < min || result > max) + goto error; + + return result; + +error: + message_fatal(_("Value of the option `%s' must be in the range " + "[%" PRIu64 ", %" PRIu64 "]"), + name, min, max); +} + + +extern uint64_t +round_up_to_mib(uint64_t n) +{ + return (n >> 20) + ((n & ((UINT32_C(1) << 20) - 1)) != 0); +} + + +/// Check if thousands separator is supported. Run-time checking is easiest +/// because it seems to be sometimes lacking even on a POSIXish system. +/// Note that trying to use thousands separators when snprintf() doesn't +/// support them results in undefined behavior. This just has happened to +/// work well enough in practice. +/// +/// DJGPP 2.05 added support for thousands separators but it's broken +/// at least under WinXP with Finnish locale that uses a non-breaking space +/// as the thousands separator. Workaround by disabling thousands separators +/// for DJGPP builds. +static void +check_thousand_sep(uint32_t slot) +{ + if (thousand == UNKNOWN) { + bufs[slot][0] = '\0'; +#ifndef __DJGPP__ + snprintf(bufs[slot], sizeof(bufs[slot]), "%'u", 1U); +#endif + thousand = bufs[slot][0] == '1' ? WORKS : BROKEN; + } + + return; +} + + +extern const char * +uint64_to_str(uint64_t value, uint32_t slot) +{ + assert(slot < ARRAY_SIZE(bufs)); + + check_thousand_sep(slot); + + if (thousand == WORKS) + snprintf(bufs[slot], sizeof(bufs[slot]), "%'" PRIu64, value); + else + snprintf(bufs[slot], sizeof(bufs[slot]), "%" PRIu64, value); + + return bufs[slot]; +} + + +extern const char * +uint64_to_nicestr(uint64_t value, enum nicestr_unit unit_min, + enum nicestr_unit unit_max, bool always_also_bytes, + uint32_t slot) +{ + assert(unit_min <= unit_max); + assert(unit_max <= NICESTR_TIB); + assert(slot < ARRAY_SIZE(bufs)); + + check_thousand_sep(slot); + + enum nicestr_unit unit = NICESTR_B; + char *pos = bufs[slot]; + size_t left = sizeof(bufs[slot]); + + if ((unit_min == NICESTR_B && value < 10000) + || unit_max == NICESTR_B) { + // The value is shown as bytes. + if (thousand == WORKS) + my_snprintf(&pos, &left, "%'u", (unsigned int)value); + else + my_snprintf(&pos, &left, "%u", (unsigned int)value); + } else { + // Scale the value to a nicer unit. Unless unit_min and + // unit_max limit us, we will show at most five significant + // digits with one decimal place. + double d = (double)(value); + do { + d /= 1024.0; + ++unit; + } while (unit < unit_min || (d > 9999.9 && unit < unit_max)); + + if (thousand == WORKS) + my_snprintf(&pos, &left, "%'.1f", d); + else + my_snprintf(&pos, &left, "%.1f", d); + } + + static const char suffix[5][4] = { "B", "KiB", "MiB", "GiB", "TiB" }; + my_snprintf(&pos, &left, " %s", suffix[unit]); + + if (always_also_bytes && value >= 10000) { + if (thousand == WORKS) + snprintf(pos, left, " (%'" PRIu64 " B)", value); + else + snprintf(pos, left, " (%" PRIu64 " B)", value); + } + + return bufs[slot]; +} + + +extern void +my_snprintf(char **pos, size_t *left, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + const int len = vsnprintf(*pos, *left, fmt, ap); + va_end(ap); + + // If an error occurred, we want the caller to think that the whole + // buffer was used. This way no more data will be written to the + // buffer. We don't need better error handling here, although it + // is possible that the result looks garbage on the terminal if + // e.g. an UTF-8 character gets split. That shouldn't (easily) + // happen though, because the buffers used have some extra room. + if (len < 0 || (size_t)(len) >= *left) { + *left = 0; + } else { + *pos += len; + *left -= (size_t)(len); + } + + return; +} + + +extern bool +is_tty_stdin(void) +{ + const bool ret = isatty(STDIN_FILENO); + + if (ret) + message_error(_("Compressed data cannot be read from " + "a terminal")); + + return ret; +} + + +extern bool +is_tty_stdout(void) +{ + const bool ret = isatty(STDOUT_FILENO); + + if (ret) + message_error(_("Compressed data cannot be written to " + "a terminal")); + + return ret; +} diff --git a/src/xz/util.h b/src/xz/util.h new file mode 100644 index 0000000..4a536f5 --- /dev/null +++ b/src/xz/util.h @@ -0,0 +1,119 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file util.h +/// \brief Miscellaneous utility functions +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +/// \brief Safe malloc() that never returns NULL +/// +/// \note xmalloc(), xrealloc(), and xstrdup() must not be used when +/// there are files open for writing, that should be cleaned up +/// before exiting. +#define xmalloc(size) xrealloc(NULL, size) + + +/// \brief Safe realloc() that never returns NULL +extern void *xrealloc(void *ptr, size_t size) + lzma_attribute((__malloc__)) lzma_attr_alloc_size(2); + + +/// \brief Safe strdup() that never returns NULL +extern char *xstrdup(const char *src) lzma_attribute((__malloc__)); + + +/// \brief Fancy version of strtoull() +/// +/// \param name Name of the option to show in case of an error +/// \param value String containing the number to be parsed; may +/// contain suffixes "k", "M", "G", "Ki", "Mi", or "Gi" +/// \param min Minimum valid value +/// \param max Maximum valid value +/// +/// \return Parsed value that is in the range [min, max]. Does not return +/// if an error occurs. +/// +extern uint64_t str_to_uint64(const char *name, const char *value, + uint64_t min, uint64_t max); + + +/// \brief Round an integer up to the next full MiB and convert to MiB +/// +/// This is used when printing memory usage and limit. +extern uint64_t round_up_to_mib(uint64_t n); + + +/// \brief Convert uint64_t to a string +/// +/// Convert the given value to a string with locale-specific thousand +/// separators, if supported by the snprintf() implementation. The string +/// is stored into an internal static buffer indicated by the slot argument. +/// A pointer to the selected buffer is returned. +/// +/// This function exists, because non-POSIX systems don't support thousand +/// separator in format strings. Solving the problem in a simple way doesn't +/// work, because it breaks gettext (specifically, the xgettext tool). +extern const char *uint64_to_str(uint64_t value, uint32_t slot); + + +enum nicestr_unit { + NICESTR_B, + NICESTR_KIB, + NICESTR_MIB, + NICESTR_GIB, + NICESTR_TIB, +}; + + +/// \brief Convert uint64_t to a nice human readable string +/// +/// This is like uint64_to_str() but uses B, KiB, MiB, GiB, or TiB suffix +/// and optionally includes the exact size in parenthesis. +/// +/// \param value Value to be printed +/// \param unit_min Smallest unit to use. This and unit_max are used +/// e.g. when showing the progress indicator to force +/// the unit to MiB. +/// \param unit_max Biggest unit to use. assert(unit_min <= unit_max). +/// \param always_also_bytes +/// Show also the exact byte value in parenthesis +/// if the nicely formatted string uses bigger unit +/// than bytes. +/// \param slot Which static buffer to use to hold the string. +/// This is shared with uint64_to_str(). +/// +/// \return Pointer to statically allocated buffer containing the string. +/// +/// \note This uses double_to_str() internally so the static buffer +/// in double_to_str() will be overwritten. +/// +extern const char *uint64_to_nicestr(uint64_t value, + enum nicestr_unit unit_min, enum nicestr_unit unit_max, + bool always_also_bytes, uint32_t slot); + + +/// \brief Wrapper for snprintf() to help constructing a string in pieces +/// +/// A maximum of *left bytes is written starting from *pos. *pos and *left +/// are updated accordingly. +extern void my_snprintf(char **pos, size_t *left, const char *fmt, ...) + lzma_attribute((__format__(__printf__, 3, 4))); + + +/// \brief Test if stdin is a terminal +/// +/// If stdin is a terminal, an error message is printed and exit status set +/// to EXIT_ERROR. +extern bool is_tty_stdin(void); + + +/// \brief Test if stdout is a terminal +/// +/// If stdout is a terminal, an error message is printed and exit status set +/// to EXIT_ERROR. +extern bool is_tty_stdout(void); diff --git a/src/xz/xz.1 b/src/xz/xz.1 new file mode 100644 index 0000000..caa5a06 --- /dev/null +++ b/src/xz/xz.1 @@ -0,0 +1,3020 @@ +'\" t +.\" +.\" Author: Lasse Collin +.\" +.\" This file has been put into the public domain. +.\" You can do whatever you want with this file. +.\" +.TH XZ 1 "2022-12-01" "Tukaani" "XZ Utils" +. +.SH NAME +xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files +. +.SH SYNOPSIS +.B xz +.RI [ option... ] +.RI [ file... ] +. +.SH COMMAND ALIASES +.B unxz +is equivalent to +.BR "xz \-\-decompress" . +.br +.B xzcat +is equivalent to +.BR "xz \-\-decompress \-\-stdout" . +.br +.B lzma +is equivalent to +.BR "xz \-\-format=lzma" . +.br +.B unlzma +is equivalent to +.BR "xz \-\-format=lzma \-\-decompress" . +.br +.B lzcat +is equivalent to +.BR "xz \-\-format=lzma \-\-decompress \-\-stdout" . +.PP +When writing scripts that need to decompress files, +it is recommended to always use the name +.B xz +with appropriate arguments +.RB ( "xz \-d" +or +.BR "xz \-dc" ) +instead of the names +.B unxz +and +.BR xzcat . +. +.SH DESCRIPTION +.B xz +is a general-purpose data compression tool with +command line syntax similar to +.BR gzip (1) +and +.BR bzip2 (1). +The native file format is the +.B .xz +format, but the legacy +.B .lzma +format used by LZMA Utils and +raw compressed streams with no container format headers +are also supported. +In addition, decompression of the +.B .lz +format used by +.B lzip +is supported. +.PP +.B xz +compresses or decompresses each +.I file +according to the selected operation mode. +If no +.I files +are given or +.I file +is +.BR \- , +.B xz +reads from standard input and writes the processed data +to standard output. +.B xz +will refuse (display an error and skip the +.IR file ) +to write compressed data to standard output if it is a terminal. +Similarly, +.B xz +will refuse to read compressed data +from standard input if it is a terminal. +.PP +Unless +.B \-\-stdout +is specified, +.I files +other than +.B \- +are written to a new file whose name is derived from the source +.I file +name: +.IP \(bu 3 +When compressing, the suffix of the target file format +.RB ( .xz +or +.BR .lzma ) +is appended to the source filename to get the target filename. +.IP \(bu 3 +When decompressing, the +.BR .xz , +.BR .lzma , +or +.B .lz +suffix is removed from the filename to get the target filename. +.B xz +also recognizes the suffixes +.B .txz +and +.BR .tlz , +and replaces them with the +.B .tar +suffix. +.PP +If the target file already exists, an error is displayed and the +.I file +is skipped. +.PP +Unless writing to standard output, +.B xz +will display a warning and skip the +.I file +if any of the following applies: +.IP \(bu 3 +.I File +is not a regular file. +Symbolic links are not followed, +and thus they are not considered to be regular files. +.IP \(bu 3 +.I File +has more than one hard link. +.IP \(bu 3 +.I File +has setuid, setgid, or sticky bit set. +.IP \(bu 3 +The operation mode is set to compress and the +.I file +already has a suffix of the target file format +.RB ( .xz +or +.B .txz +when compressing to the +.B .xz +format, and +.B .lzma +or +.B .tlz +when compressing to the +.B .lzma +format). +.IP \(bu 3 +The operation mode is set to decompress and the +.I file +doesn't have a suffix of any of the supported file formats +.RB ( .xz , +.BR .txz , +.BR .lzma , +.BR .tlz , +or +.BR .lz ). +.PP +After successfully compressing or decompressing the +.IR file , +.B xz +copies the owner, group, permissions, access time, +and modification time from the source +.I file +to the target file. +If copying the group fails, the permissions are modified +so that the target file doesn't become accessible to users +who didn't have permission to access the source +.IR file . +.B xz +doesn't support copying other metadata like access control lists +or extended attributes yet. +.PP +Once the target file has been successfully closed, the source +.I file +is removed unless +.B \-\-keep +was specified. +The source +.I file +is never removed if the output is written to standard output +or if an error occurs. +.PP +Sending +.B SIGINFO +or +.B SIGUSR1 +to the +.B xz +process makes it print progress information to standard error. +This has only limited use since when standard error +is a terminal, using +.B \-\-verbose +will display an automatically updating progress indicator. +. +.SS "Memory usage" +The memory usage of +.B xz +varies from a few hundred kilobytes to several gigabytes +depending on the compression settings. +The settings used when compressing a file determine +the memory requirements of the decompressor. +Typically the decompressor needs 5\ % to 20\ % of +the amount of memory that the compressor needed when +creating the file. +For example, decompressing a file created with +.B xz \-9 +currently requires 65\ MiB of memory. +Still, it is possible to have +.B .xz +files that require several gigabytes of memory to decompress. +.PP +Especially users of older systems may find +the possibility of very large memory usage annoying. +To prevent uncomfortable surprises, +.B xz +has a built-in memory usage limiter, which is disabled by default. +While some operating systems provide ways to limit +the memory usage of processes, relying on it +wasn't deemed to be flexible enough (for example, using +.BR ulimit (1) +to limit virtual memory tends to cripple +.BR mmap (2)). +.PP +The memory usage limiter can be enabled with +the command line option \fB\-\-memlimit=\fIlimit\fR. +Often it is more convenient to enable the limiter +by default by setting the environment variable +.BR XZ_DEFAULTS , +for example, +.BR XZ_DEFAULTS=\-\-memlimit=150MiB . +It is possible to set the limits separately +for compression and decompression by using +.BI \-\-memlimit\-compress= limit +and \fB\-\-memlimit\-decompress=\fIlimit\fR. +Using these two options outside +.B XZ_DEFAULTS +is rarely useful because a single run of +.B xz +cannot do both compression and decompression and +.BI \-\-memlimit= limit +(or +.B \-M +.IR limit ) +is shorter to type on the command line. +.PP +If the specified memory usage limit is exceeded when decompressing, +.B xz +will display an error and decompressing the file will fail. +If the limit is exceeded when compressing, +.B xz +will try to scale the settings down so that the limit +is no longer exceeded (except when using +.B \-\-format=raw +or +.BR \-\-no\-adjust ). +This way the operation won't fail unless the limit is very small. +The scaling of the settings is done in steps that don't +match the compression level presets, for example, if the limit is +only slightly less than the amount required for +.BR "xz \-9" , +the settings will be scaled down only a little, +not all the way down to +.BR "xz \-8" . +. +.SS "Concatenation and padding with .xz files" +It is possible to concatenate +.B .xz +files as is. +.B xz +will decompress such files as if they were a single +.B .xz +file. +.PP +It is possible to insert padding between the concatenated parts +or after the last part. +The padding must consist of null bytes and the size +of the padding must be a multiple of four bytes. +This can be useful, for example, if the +.B .xz +file is stored on a medium that measures file sizes +in 512-byte blocks. +.PP +Concatenation and padding are not allowed with +.B .lzma +files or raw streams. +. +.SH OPTIONS +. +.SS "Integer suffixes and special values" +In most places where an integer argument is expected, +an optional suffix is supported to easily indicate large integers. +There must be no space between the integer and the suffix. +.TP +.B KiB +Multiply the integer by 1,024 (2^10). +.BR Ki , +.BR k , +.BR kB , +.BR K , +and +.B KB +are accepted as synonyms for +.BR KiB . +.TP +.B MiB +Multiply the integer by 1,048,576 (2^20). +.BR Mi , +.BR m , +.BR M , +and +.B MB +are accepted as synonyms for +.BR MiB . +.TP +.B GiB +Multiply the integer by 1,073,741,824 (2^30). +.BR Gi , +.BR g , +.BR G , +and +.B GB +are accepted as synonyms for +.BR GiB . +.PP +The special value +.B max +can be used to indicate the maximum integer value +supported by the option. +. +.SS "Operation mode" +If multiple operation mode options are given, +the last one takes effect. +.TP +.BR \-z ", " \-\-compress +Compress. +This is the default operation mode when no operation mode option +is specified and no other operation mode is implied from +the command name (for example, +.B unxz +implies +.BR \-\-decompress ). +.TP +.BR \-d ", " \-\-decompress ", " \-\-uncompress +Decompress. +.TP +.BR \-t ", " \-\-test +Test the integrity of compressed +.IR files . +This option is equivalent to +.B "\-\-decompress \-\-stdout" +except that the decompressed data is discarded instead of being +written to standard output. +No files are created or removed. +.TP +.BR \-l ", " \-\-list +Print information about compressed +.IR files . +No uncompressed output is produced, +and no files are created or removed. +In list mode, the program cannot read +the compressed data from standard +input or from other unseekable sources. +.IP "" +The default listing shows basic information about +.IR files , +one file per line. +To get more detailed information, use also the +.B \-\-verbose +option. +For even more information, use +.B \-\-verbose +twice, but note that this may be slow, because getting all the extra +information requires many seeks. +The width of verbose output exceeds +80 characters, so piping the output to, for example, +.B "less\ \-S" +may be convenient if the terminal isn't wide enough. +.IP "" +The exact output may vary between +.B xz +versions and different locales. +For machine-readable output, +.B \-\-robot \-\-list +should be used. +. +.SS "Operation modifiers" +.TP +.BR \-k ", " \-\-keep +Don't delete the input files. +.IP "" +Since +.B xz +5.2.6, +this option also makes +.B xz +compress or decompress even if the input is +a symbolic link to a regular file, +has more than one hard link, +or has the setuid, setgid, or sticky bit set. +The setuid, setgid, and sticky bits are not copied +to the target file. +In earlier versions this was only done with +.BR \-\-force . +.TP +.BR \-f ", " \-\-force +This option has several effects: +.RS +.IP \(bu 3 +If the target file already exists, +delete it before compressing or decompressing. +.IP \(bu 3 +Compress or decompress even if the input is +a symbolic link to a regular file, +has more than one hard link, +or has the setuid, setgid, or sticky bit set. +The setuid, setgid, and sticky bits are not copied +to the target file. +.IP \(bu 3 +When used with +.B \-\-decompress +.B \-\-stdout +and +.B xz +cannot recognize the type of the source file, +copy the source file as is to standard output. +This allows +.B xzcat +.B \-\-force +to be used like +.BR cat (1) +for files that have not been compressed with +.BR xz . +Note that in future, +.B xz +might support new compressed file formats, which may make +.B xz +decompress more types of files instead of copying them as is to +standard output. +.BI \-\-format= format +can be used to restrict +.B xz +to decompress only a single file format. +.RE +.TP +.BR \-c ", " \-\-stdout ", " \-\-to\-stdout +Write the compressed or decompressed data to +standard output instead of a file. +This implies +.BR \-\-keep . +.TP +.B \-\-single\-stream +Decompress only the first +.B .xz +stream, and +silently ignore possible remaining input data following the stream. +Normally such trailing garbage makes +.B xz +display an error. +.IP "" +.B xz +never decompresses more than one stream from +.B .lzma +files or raw streams, but this option still makes +.B xz +ignore the possible trailing data after the +.B .lzma +file or raw stream. +.IP "" +This option has no effect if the operation mode is not +.B \-\-decompress +or +.BR \-\-test . +.TP +.B \-\-no\-sparse +Disable creation of sparse files. +By default, if decompressing into a regular file, +.B xz +tries to make the file sparse if the decompressed data contains +long sequences of binary zeros. +It also works when writing to standard output +as long as standard output is connected to a regular file +and certain additional conditions are met to make it safe. +Creating sparse files may save disk space and speed up +the decompression by reducing the amount of disk I/O. +.TP +\fB\-S\fR \fI.suf\fR, \fB\-\-suffix=\fI.suf +When compressing, use +.I .suf +as the suffix for the target file instead of +.B .xz +or +.BR .lzma . +If not writing to standard output and +the source file already has the suffix +.IR .suf , +a warning is displayed and the file is skipped. +.IP "" +When decompressing, recognize files with the suffix +.I .suf +in addition to files with the +.BR .xz , +.BR .txz , +.BR .lzma , +.BR .tlz , +or +.B .lz +suffix. +If the source file has the suffix +.IR .suf , +the suffix is removed to get the target filename. +.IP "" +When compressing or decompressing raw streams +.RB ( \-\-format=raw ), +the suffix must always be specified unless +writing to standard output, +because there is no default suffix for raw streams. +.TP +\fB\-\-files\fR[\fB=\fIfile\fR] +Read the filenames to process from +.IR file ; +if +.I file +is omitted, filenames are read from standard input. +Filenames must be terminated with the newline character. +A dash +.RB ( \- ) +is taken as a regular filename; it doesn't mean standard input. +If filenames are given also as command line arguments, they are +processed before the filenames read from +.IR file . +.TP +\fB\-\-files0\fR[\fB=\fIfile\fR] +This is identical to \fB\-\-files\fR[\fB=\fIfile\fR] except +that each filename must be terminated with the null character. +. +.SS "Basic file format and compression options" +.TP +\fB\-F\fR \fIformat\fR, \fB\-\-format=\fIformat +Specify the file +.I format +to compress or decompress: +.RS +.TP +.B auto +This is the default. +When compressing, +.B auto +is equivalent to +.BR xz . +When decompressing, +the format of the input file is automatically detected. +Note that raw streams (created with +.BR \-\-format=raw ) +cannot be auto-detected. +.TP +.B xz +Compress to the +.B .xz +file format, or accept only +.B .xz +files when decompressing. +.TP +.BR lzma ", " alone +Compress to the legacy +.B .lzma +file format, or accept only +.B .lzma +files when decompressing. +The alternative name +.B alone +is provided for backwards compatibility with LZMA Utils. +.TP +.B lzip +Accept only +.B .lz +files when decompressing. +Compression is not supported. +.IP "" +The +.B .lz +format version 0 and the unextended version 1 are supported. +Version 0 files were produced by +.B lzip +1.3 and older. +Such files aren't common but may be found from file archives +as a few source packages were released in this format. +People might have old personal files in this format too. +Decompression support for the format version 0 was removed in +.B lzip +1.18. +.IP "" +.B lzip +1.4 and later create files in the format version 1. +The sync flush marker extension to the format version 1 was added in +.B lzip +1.6. +This extension is rarely used and isn't supported by +.B xz +(diagnosed as corrupt input). +.TP +.B raw +Compress or uncompress a raw stream (no headers). +This is meant for advanced users only. +To decode raw streams, you need use +.B \-\-format=raw +and explicitly specify the filter chain, +which normally would have been stored in the container headers. +.RE +.TP +\fB\-C\fR \fIcheck\fR, \fB\-\-check=\fIcheck +Specify the type of the integrity check. +The check is calculated from the uncompressed data and +stored in the +.B .xz +file. +This option has an effect only when compressing into the +.B .xz +format; the +.B .lzma +format doesn't support integrity checks. +The integrity check (if any) is verified when the +.B .xz +file is decompressed. +.IP "" +Supported +.I check +types: +.RS +.TP +.B none +Don't calculate an integrity check at all. +This is usually a bad idea. +This can be useful when integrity of the data is verified +by other means anyway. +.TP +.B crc32 +Calculate CRC32 using the polynomial from IEEE-802.3 (Ethernet). +.TP +.B crc64 +Calculate CRC64 using the polynomial from ECMA-182. +This is the default, since it is slightly better than CRC32 +at detecting damaged files and the speed difference is negligible. +.TP +.B sha256 +Calculate SHA-256. +This is somewhat slower than CRC32 and CRC64. +.RE +.IP "" +Integrity of the +.B .xz +headers is always verified with CRC32. +It is not possible to change or disable it. +.TP +.B \-\-ignore\-check +Don't verify the integrity check of the compressed data when decompressing. +The CRC32 values in the +.B .xz +headers will still be verified normally. +.IP "" +.B "Do not use this option unless you know what you are doing." +Possible reasons to use this option: +.RS +.IP \(bu 3 +Trying to recover data from a corrupt .xz file. +.IP \(bu 3 +Speeding up decompression. +This matters mostly with SHA-256 or +with files that have compressed extremely well. +It's recommended to not use this option for this purpose +unless the file integrity is verified externally in some other way. +.RE +.TP +.BR \-0 " ... " \-9 +Select a compression preset level. +The default is +.BR \-6 . +If multiple preset levels are specified, +the last one takes effect. +If a custom filter chain was already specified, setting +a compression preset level clears the custom filter chain. +.IP "" +The differences between the presets are more significant than with +.BR gzip (1) +and +.BR bzip2 (1). +The selected compression settings determine +the memory requirements of the decompressor, +thus using a too high preset level might make it painful +to decompress the file on an old system with little RAM. +Specifically, +.B "it's not a good idea to blindly use \-9 for everything" +like it often is with +.BR gzip (1) +and +.BR bzip2 (1). +.RS +.TP +.BR "\-0" " ... " "\-3" +These are somewhat fast presets. +.B \-0 +is sometimes faster than +.B "gzip \-9" +while compressing much better. +The higher ones often have speed comparable to +.BR bzip2 (1) +with comparable or better compression ratio, +although the results +depend a lot on the type of data being compressed. +.TP +.BR "\-4" " ... " "\-6" +Good to very good compression while keeping +decompressor memory usage reasonable even for old systems. +.B \-6 +is the default, which is usually a good choice +for distributing files that need to be decompressible +even on systems with only 16\ MiB RAM. +.RB ( \-5e +or +.B \-6e +may be worth considering too. +See +.BR \-\-extreme .) +.TP +.B "\-7 ... \-9" +These are like +.B \-6 +but with higher compressor and decompressor memory requirements. +These are useful only when compressing files bigger than +8\ MiB, 16\ MiB, and 32\ MiB, respectively. +.RE +.IP "" +On the same hardware, the decompression speed is approximately +a constant number of bytes of compressed data per second. +In other words, the better the compression, +the faster the decompression will usually be. +This also means that the amount of uncompressed output +produced per second can vary a lot. +.IP "" +The following table summarises the features of the presets: +.RS +.RS +.PP +.TS +tab(;); +c c c c c +n n n n n. +Preset;DictSize;CompCPU;CompMem;DecMem +\-0;256 KiB;0;3 MiB;1 MiB +\-1;1 MiB;1;9 MiB;2 MiB +\-2;2 MiB;2;17 MiB;3 MiB +\-3;4 MiB;3;32 MiB;5 MiB +\-4;4 MiB;4;48 MiB;5 MiB +\-5;8 MiB;5;94 MiB;9 MiB +\-6;8 MiB;6;94 MiB;9 MiB +\-7;16 MiB;6;186 MiB;17 MiB +\-8;32 MiB;6;370 MiB;33 MiB +\-9;64 MiB;6;674 MiB;65 MiB +.TE +.RE +.RE +.IP "" +Column descriptions: +.RS +.IP \(bu 3 +DictSize is the LZMA2 dictionary size. +It is waste of memory to use a dictionary bigger than +the size of the uncompressed file. +This is why it is good to avoid using the presets +.BR \-7 " ... " \-9 +when there's no real need for them. +At +.B \-6 +and lower, the amount of memory wasted is +usually low enough to not matter. +.IP \(bu 3 +CompCPU is a simplified representation of the LZMA2 settings +that affect compression speed. +The dictionary size affects speed too, +so while CompCPU is the same for levels +.BR \-6 " ... " \-9 , +higher levels still tend to be a little slower. +To get even slower and thus possibly better compression, see +.BR \-\-extreme . +.IP \(bu 3 +CompMem contains the compressor memory requirements +in the single-threaded mode. +It may vary slightly between +.B xz +versions. +Memory requirements of some of the future multithreaded modes may +be dramatically higher than that of the single-threaded mode. +.IP \(bu 3 +DecMem contains the decompressor memory requirements. +That is, the compression settings determine +the memory requirements of the decompressor. +The exact decompressor memory usage is slightly more than +the LZMA2 dictionary size, but the values in the table +have been rounded up to the next full MiB. +.RE +.TP +.BR \-e ", " \-\-extreme +Use a slower variant of the selected compression preset level +.RB ( \-0 " ... " \-9 ) +to hopefully get a little bit better compression ratio, +but with bad luck this can also make it worse. +Decompressor memory usage is not affected, +but compressor memory usage increases a little at preset levels +.BR \-0 " ... " \-3 . +.IP "" +Since there are two presets with dictionary sizes +4\ MiB and 8\ MiB, the presets +.B \-3e +and +.B \-5e +use slightly faster settings (lower CompCPU) than +.B \-4e +and +.BR \-6e , +respectively. +That way no two presets are identical. +.RS +.RS +.PP +.TS +tab(;); +c c c c c +n n n n n. +Preset;DictSize;CompCPU;CompMem;DecMem +\-0e;256 KiB;8;4 MiB;1 MiB +\-1e;1 MiB;8;13 MiB;2 MiB +\-2e;2 MiB;8;25 MiB;3 MiB +\-3e;4 MiB;7;48 MiB;5 MiB +\-4e;4 MiB;8;48 MiB;5 MiB +\-5e;8 MiB;7;94 MiB;9 MiB +\-6e;8 MiB;8;94 MiB;9 MiB +\-7e;16 MiB;8;186 MiB;17 MiB +\-8e;32 MiB;8;370 MiB;33 MiB +\-9e;64 MiB;8;674 MiB;65 MiB +.TE +.RE +.RE +.IP "" +For example, there are a total of four presets that use +8\ MiB dictionary, whose order from the fastest to the slowest is +.BR \-5 , +.BR \-6 , +.BR \-5e , +and +.BR \-6e . +.TP +.B \-\-fast +.PD 0 +.TP +.B \-\-best +.PD +These are somewhat misleading aliases for +.B \-0 +and +.BR \-9 , +respectively. +These are provided only for backwards compatibility +with LZMA Utils. +Avoid using these options. +.TP +.BI \-\-block\-size= size +When compressing to the +.B .xz +format, split the input data into blocks of +.I size +bytes. +The blocks are compressed independently from each other, +which helps with multi-threading and +makes limited random-access decompression possible. +This option is typically used to override the default +block size in multi-threaded mode, +but this option can be used in single-threaded mode too. +.IP "" +In multi-threaded mode about three times +.I size +bytes will be allocated in each thread for buffering input and output. +The default +.I size +is three times the LZMA2 dictionary size or 1 MiB, +whichever is more. +Typically a good value is 2\(en4 times +the size of the LZMA2 dictionary or at least 1 MiB. +Using +.I size +less than the LZMA2 dictionary size is waste of RAM +because then the LZMA2 dictionary buffer will never get fully used. +The sizes of the blocks are stored in the block headers, +which a future version of +.B xz +will use for multi-threaded decompression. +.IP "" +In single-threaded mode no block splitting is done by default. +Setting this option doesn't affect memory usage. +No size information is stored in block headers, +thus files created in single-threaded mode +won't be identical to files created in multi-threaded mode. +The lack of size information also means that a future version of +.B xz +won't be able decompress the files in multi-threaded mode. +.TP +.BI \-\-block\-list= sizes +When compressing to the +.B .xz +format, start a new block after +the given intervals of uncompressed data. +.IP "" +The uncompressed +.I sizes +of the blocks are specified as a comma-separated list. +Omitting a size (two or more consecutive commas) is a shorthand +to use the size of the previous block. +.IP "" +If the input file is bigger than the sum of +.IR sizes , +the last value in +.I sizes +is repeated until the end of the file. +A special value of +.B 0 +may be used as the last value to indicate that +the rest of the file should be encoded as a single block. +.IP "" +If one specifies +.I sizes +that exceed the encoder's block size +(either the default value in threaded mode or +the value specified with \fB\-\-block\-size=\fIsize\fR), +the encoder will create additional blocks while +keeping the boundaries specified in +.IR sizes . +For example, if one specifies +.B \-\-block\-size=10MiB +.B \-\-block\-list=5MiB,10MiB,8MiB,12MiB,24MiB +and the input file is 80 MiB, +one will get 11 blocks: +5, 10, 8, 10, 2, 10, 10, 4, 10, 10, and 1 MiB. +.IP "" +In multi-threaded mode the sizes of the blocks +are stored in the block headers. +This isn't done in single-threaded mode, +so the encoded output won't be +identical to that of the multi-threaded mode. +.TP +.BI \-\-flush\-timeout= timeout +When compressing, if more than +.I timeout +milliseconds (a positive integer) has passed since the previous flush and +reading more input would block, +all the pending input data is flushed from the encoder and +made available in the output stream. +This can be useful if +.B xz +is used to compress data that is streamed over a network. +Small +.I timeout +values make the data available at the receiving end +with a small delay, but large +.I timeout +values give better compression ratio. +.IP "" +This feature is disabled by default. +If this option is specified more than once, the last one takes effect. +The special +.I timeout +value of +.B 0 +can be used to explicitly disable this feature. +.IP "" +This feature is not available on non-POSIX systems. +.IP "" +.\" FIXME +.B "This feature is still experimental." +Currently +.B xz +is unsuitable for decompressing the stream in real time due to how +.B xz +does buffering. +.TP +.BI \-\-memlimit\-compress= limit +Set a memory usage limit for compression. +If this option is specified multiple times, +the last one takes effect. +.IP "" +If the compression settings exceed the +.IR limit , +.B xz +will attempt to adjust the settings downwards so that +the limit is no longer exceeded and display a notice that +automatic adjustment was done. +The adjustments are done in this order: +reducing the number of threads, +switching to single-threaded mode +if even one thread in multi-threaded mode exceeds the +.IR limit , +and finally reducing the LZMA2 dictionary size. +.IP "" +When compressing with +.B \-\-format=raw +or if +.B \-\-no\-adjust +has been specified, +only the number of threads may be reduced +since it can be done without affecting the compressed output. +.IP "" +If the +.I limit +cannot be met even with the adjustments described above, +an error is displayed and +.B xz +will exit with exit status 1. +.IP "" +The +.I limit +can be specified in multiple ways: +.RS +.IP \(bu 3 +The +.I limit +can be an absolute value in bytes. +Using an integer suffix like +.B MiB +can be useful. +Example: +.B "\-\-memlimit\-compress=80MiB" +.IP \(bu 3 +The +.I limit +can be specified as a percentage of total physical memory (RAM). +This can be useful especially when setting the +.B XZ_DEFAULTS +environment variable in a shell initialization script +that is shared between different computers. +That way the limit is automatically bigger +on systems with more memory. +Example: +.B "\-\-memlimit\-compress=70%" +.IP \(bu 3 +The +.I limit +can be reset back to its default value by setting it to +.BR 0 . +This is currently equivalent to setting the +.I limit +to +.B max +(no memory usage limit). +.RE +.IP "" +For 32-bit +.B xz +there is a special case: if the +.I limit +would be over +.BR "4020\ MiB" , +the +.I limit +is set to +.BR "4020\ MiB" . +On MIPS32 +.B "2000\ MiB" +is used instead. +(The values +.B 0 +and +.B max +aren't affected by this. +A similar feature doesn't exist for decompression.) +This can be helpful when a 32-bit executable has access +to 4\ GiB address space (2 GiB on MIPS32) +while hopefully doing no harm in other situations. +.IP "" +See also the section +.BR "Memory usage" . +.TP +.BI \-\-memlimit\-decompress= limit +Set a memory usage limit for decompression. +This also affects the +.B \-\-list +mode. +If the operation is not possible without exceeding the +.IR limit , +.B xz +will display an error and decompressing the file will fail. +See +.BI \-\-memlimit\-compress= limit +for possible ways to specify the +.IR limit . +.TP +.BI \-\-memlimit\-mt\-decompress= limit +Set a memory usage limit for multi-threaded decompression. +This can only affect the number of threads; +this will never make +.B xz +refuse to decompress a file. +If +.I limit +is too low to allow any multi-threading, the +.I limit +is ignored and +.B xz +will continue in single-threaded mode. +Note that if also +.B \-\-memlimit\-decompress +is used, +it will always apply to both single-threaded and multi-threaded modes, +and so the effective +.I limit +for multi-threading will never be higher than the limit set with +.BR \-\-memlimit\-decompress . +.IP "" +In contrast to the other memory usage limit options, +.BI \-\-memlimit\-mt\-decompress= limit +has a system-specific default +.IR limit . +.B "xz \-\-info\-memory" +can be used to see the current value. +.IP "" +This option and its default value exist +because without any limit the threaded decompressor +could end up allocating an insane amount of memory with some input files. +If the default +.I limit +is too low on your system, +feel free to increase the +.I limit +but never set it to a value larger than the amount of usable RAM +as with appropriate input files +.B xz +will attempt to use that amount of memory +even with a low number of threads. +Running out of memory or swapping +will not improve decompression performance. +.IP "" +See +.BI \-\-memlimit\-compress= limit +for possible ways to specify the +.IR limit . +Setting +.I limit +to +.B 0 +resets the +.I limit +to the default system-specific value. +.IP "" +.TP +\fB\-M\fR \fIlimit\fR, \fB\-\-memlimit=\fIlimit\fR, \fB\-\-memory=\fIlimit +This is equivalent to specifying +.BI \-\-memlimit\-compress= limit +.BI \-\-memlimit-decompress= limit +\fB\-\-memlimit\-mt\-decompress=\fIlimit\fR. +.TP +.B \-\-no\-adjust +Display an error and exit if the memory usage limit cannot be +met without adjusting settings that affect the compressed output. +That is, this prevents +.B xz +from switching the encoder from multi-threaded mode to single-threaded mode +and from reducing the LZMA2 dictionary size. +Even when this option is used the number of threads may be reduced +to meet the memory usage limit as that won't affect the compressed output. +.IP "" +Automatic adjusting is always disabled when creating raw streams +.RB ( \-\-format=raw ). +.TP +\fB\-T\fR \fIthreads\fR, \fB\-\-threads=\fIthreads +Specify the number of worker threads to use. +Setting +.I threads +to a special value +.B 0 +makes +.B xz +use up to as many threads as the processor(s) on the system support. +The actual number of threads can be fewer than +.I threads +if the input file is not big enough +for threading with the given settings or +if using more threads would exceed the memory usage limit. +.IP "" +The single-threaded and multi-threaded compressors produce different output. +Single-threaded compressor will give the smallest file size but +only the output from the multi-threaded compressor can be decompressed +using multiple threads. +Setting +.I threads +to +.B 1 +will use the single-threaded mode. +Setting +.I threads +to any other value, including +.BR 0 , +will use the multi-threaded compressor +even if the system supports only one hardware thread. +.RB ( xz +5.2.x +used single-threaded mode in this situation.) +.IP "" +To use multi-threaded mode with only one thread, set +.I threads +to +.BR +1 . +The +.B + +prefix has no effect with values other than +.BR 1 . +A memory usage limit can still make +.B xz +switch to single-threaded mode unless +.B \-\-no\-adjust +is used. +Support for the +.B + +prefix was added in +.B xz +5.4.0. +.IP "" +If an automatic number of threads has been requested and +no memory usage limit has been specified, +then a system-specific default soft limit will be used to possibly +limit the number of threads. +It is a soft limit in sense that it is ignored +if the number of threads becomes one, +thus a soft limit will never stop +.B xz +from compressing or decompressing. +This default soft limit will not make +.B xz +switch from multi-threaded mode to single-threaded mode. +The active limits can be seen with +.BR "xz \-\-info\-memory" . +.IP "" +Currently the only threading method is to split the input into +blocks and compress them independently from each other. +The default block size depends on the compression level and +can be overridden with the +.BI \-\-block\-size= size +option. +.IP "" +Threaded decompression only works on files that contain +multiple blocks with size information in block headers. +All large enough files compressed in multi-threaded mode +meet this condition, +but files compressed in single-threaded mode don't even if +.BI \-\-block\-size= size +has been used. +. +.SS "Custom compressor filter chains" +A custom filter chain allows specifying +the compression settings in detail instead of relying on +the settings associated to the presets. +When a custom filter chain is specified, +preset options +.RB ( \-0 +\&...\& +.B \-9 +and +.BR \-\-extreme ) +earlier on the command line are forgotten. +If a preset option is specified +after one or more custom filter chain options, +the new preset takes effect and +the custom filter chain options specified earlier are forgotten. +.PP +A filter chain is comparable to piping on the command line. +When compressing, the uncompressed input goes to the first filter, +whose output goes to the next filter (if any). +The output of the last filter gets written to the compressed file. +The maximum number of filters in the chain is four, +but typically a filter chain has only one or two filters. +.PP +Many filters have limitations on where they can be +in the filter chain: +some filters can work only as the last filter in the chain, +some only as a non-last filter, and some work in any position +in the chain. +Depending on the filter, this limitation is either inherent to +the filter design or exists to prevent security issues. +.PP +A custom filter chain is specified by using one or more +filter options in the order they are wanted in the filter chain. +That is, the order of filter options is significant! +When decoding raw streams +.RB ( \-\-format=raw ), +the filter chain is specified in the same order as +it was specified when compressing. +.PP +Filters take filter-specific +.I options +as a comma-separated list. +Extra commas in +.I options +are ignored. +Every option has a default value, so you need to +specify only those you want to change. +.PP +To see the whole filter chain and +.IR options , +use +.B "xz \-vv" +(that is, use +.B \-\-verbose +twice). +This works also for viewing the filter chain options used by presets. +.TP +\fB\-\-lzma1\fR[\fB=\fIoptions\fR] +.PD 0 +.TP +\fB\-\-lzma2\fR[\fB=\fIoptions\fR] +.PD +Add LZMA1 or LZMA2 filter to the filter chain. +These filters can be used only as the last filter in the chain. +.IP "" +LZMA1 is a legacy filter, +which is supported almost solely due to the legacy +.B .lzma +file format, which supports only LZMA1. +LZMA2 is an updated +version of LZMA1 to fix some practical issues of LZMA1. +The +.B .xz +format uses LZMA2 and doesn't support LZMA1 at all. +Compression speed and ratios of LZMA1 and LZMA2 +are practically the same. +.IP "" +LZMA1 and LZMA2 share the same set of +.IR options : +.RS +.TP +.BI preset= preset +Reset all LZMA1 or LZMA2 +.I options +to +.IR preset . +.I Preset +consist of an integer, which may be followed by single-letter +preset modifiers. +The integer can be from +.B 0 +to +.BR 9 , +matching the command line options +.B \-0 +\&...\& +.BR \-9 . +The only supported modifier is currently +.BR e , +which matches +.BR \-\-extreme . +If no +.B preset +is specified, the default values of LZMA1 or LZMA2 +.I options +are taken from the preset +.BR 6 . +.TP +.BI dict= size +Dictionary (history buffer) +.I size +indicates how many bytes of the recently processed +uncompressed data is kept in memory. +The algorithm tries to find repeating byte sequences (matches) in +the uncompressed data, and replace them with references +to the data currently in the dictionary. +The bigger the dictionary, the higher is the chance +to find a match. +Thus, increasing dictionary +.I size +usually improves compression ratio, but +a dictionary bigger than the uncompressed file is waste of memory. +.IP "" +Typical dictionary +.I size +is from 64\ KiB to 64\ MiB. +The minimum is 4\ KiB. +The maximum for compression is currently 1.5\ GiB (1536\ MiB). +The decompressor already supports dictionaries up to +one byte less than 4\ GiB, which is the maximum for +the LZMA1 and LZMA2 stream formats. +.IP "" +Dictionary +.I size +and match finder +.RI ( mf ) +together determine the memory usage of the LZMA1 or LZMA2 encoder. +The same (or bigger) dictionary +.I size +is required for decompressing that was used when compressing, +thus the memory usage of the decoder is determined +by the dictionary size used when compressing. +The +.B .xz +headers store the dictionary +.I size +either as +.RI "2^" n +or +.RI "2^" n " + 2^(" n "\-1)," +so these +.I sizes +are somewhat preferred for compression. +Other +.I sizes +will get rounded up when stored in the +.B .xz +headers. +.TP +.BI lc= lc +Specify the number of literal context bits. +The minimum is 0 and the maximum is 4; the default is 3. +In addition, the sum of +.I lc +and +.I lp +must not exceed 4. +.IP "" +All bytes that cannot be encoded as matches +are encoded as literals. +That is, literals are simply 8-bit bytes +that are encoded one at a time. +.IP "" +The literal coding makes an assumption that the highest +.I lc +bits of the previous uncompressed byte correlate +with the next byte. +For example, in typical English text, an upper-case letter is +often followed by a lower-case letter, and a lower-case +letter is usually followed by another lower-case letter. +In the US-ASCII character set, the highest three bits are 010 +for upper-case letters and 011 for lower-case letters. +When +.I lc +is at least 3, the literal coding can take advantage of +this property in the uncompressed data. +.IP "" +The default value (3) is usually good. +If you want maximum compression, test +.BR lc=4 . +Sometimes it helps a little, and +sometimes it makes compression worse. +If it makes it worse, test +.B lc=2 +too. +.TP +.BI lp= lp +Specify the number of literal position bits. +The minimum is 0 and the maximum is 4; the default is 0. +.IP "" +.I Lp +affects what kind of alignment in the uncompressed data is +assumed when encoding literals. +See +.I pb +below for more information about alignment. +.TP +.BI pb= pb +Specify the number of position bits. +The minimum is 0 and the maximum is 4; the default is 2. +.IP "" +.I Pb +affects what kind of alignment in the uncompressed data is +assumed in general. +The default means four-byte alignment +.RI (2^ pb =2^2=4), +which is often a good choice when there's no better guess. +.IP "" +When the alignment is known, setting +.I pb +accordingly may reduce the file size a little. +For example, with text files having one-byte +alignment (US-ASCII, ISO-8859-*, UTF-8), setting +.B pb=0 +can improve compression slightly. +For UTF-16 text, +.B pb=1 +is a good choice. +If the alignment is an odd number like 3 bytes, +.B pb=0 +might be the best choice. +.IP "" +Even though the assumed alignment can be adjusted with +.I pb +and +.IR lp , +LZMA1 and LZMA2 still slightly favor 16-byte alignment. +It might be worth taking into account when designing file formats +that are likely to be often compressed with LZMA1 or LZMA2. +.TP +.BI mf= mf +Match finder has a major effect on encoder speed, +memory usage, and compression ratio. +Usually Hash Chain match finders are faster than Binary Tree +match finders. +The default depends on the +.IR preset : +0 uses +.BR hc3 , +1\(en3 +use +.BR hc4 , +and the rest use +.BR bt4 . +.IP "" +The following match finders are supported. +The memory usage formulas below are rough approximations, +which are closest to the reality when +.I dict +is a power of two. +.RS +.TP +.B hc3 +Hash Chain with 2- and 3-byte hashing +.br +Minimum value for +.IR nice : +3 +.br +Memory usage: +.br +.I dict +* 7.5 (if +.I dict +<= 16 MiB); +.br +.I dict +* 5.5 + 64 MiB (if +.I dict +> 16 MiB) +.TP +.B hc4 +Hash Chain with 2-, 3-, and 4-byte hashing +.br +Minimum value for +.IR nice : +4 +.br +Memory usage: +.br +.I dict +* 7.5 (if +.I dict +<= 32 MiB); +.br +.I dict +* 6.5 (if +.I dict +> 32 MiB) +.TP +.B bt2 +Binary Tree with 2-byte hashing +.br +Minimum value for +.IR nice : +2 +.br +Memory usage: +.I dict +* 9.5 +.TP +.B bt3 +Binary Tree with 2- and 3-byte hashing +.br +Minimum value for +.IR nice : +3 +.br +Memory usage: +.br +.I dict +* 11.5 (if +.I dict +<= 16 MiB); +.br +.I dict +* 9.5 + 64 MiB (if +.I dict +> 16 MiB) +.TP +.B bt4 +Binary Tree with 2-, 3-, and 4-byte hashing +.br +Minimum value for +.IR nice : +4 +.br +Memory usage: +.br +.I dict +* 11.5 (if +.I dict +<= 32 MiB); +.br +.I dict +* 10.5 (if +.I dict +> 32 MiB) +.RE +.TP +.BI mode= mode +Compression +.I mode +specifies the method to analyze +the data produced by the match finder. +Supported +.I modes +are +.B fast +and +.BR normal . +The default is +.B fast +for +.I presets +0\(en3 and +.B normal +for +.I presets +4\(en9. +.IP "" +Usually +.B fast +is used with Hash Chain match finders and +.B normal +with Binary Tree match finders. +This is also what the +.I presets +do. +.TP +.BI nice= nice +Specify what is considered to be a nice length for a match. +Once a match of at least +.I nice +bytes is found, the algorithm stops +looking for possibly better matches. +.IP "" +.I Nice +can be 2\(en273 bytes. +Higher values tend to give better compression ratio +at the expense of speed. +The default depends on the +.IR preset . +.TP +.BI depth= depth +Specify the maximum search depth in the match finder. +The default is the special value of 0, +which makes the compressor determine a reasonable +.I depth +from +.I mf +and +.IR nice . +.IP "" +Reasonable +.I depth +for Hash Chains is 4\(en100 and 16\(en1000 for Binary Trees. +Using very high values for +.I depth +can make the encoder extremely slow with some files. +Avoid setting the +.I depth +over 1000 unless you are prepared to interrupt +the compression in case it is taking far too long. +.RE +.IP "" +When decoding raw streams +.RB ( \-\-format=raw ), +LZMA2 needs only the dictionary +.IR size . +LZMA1 needs also +.IR lc , +.IR lp , +and +.IR pb . +.TP +\fB\-\-x86\fR[\fB=\fIoptions\fR] +.PD 0 +.TP +\fB\-\-arm\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-armthumb\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-arm64\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-powerpc\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-ia64\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-sparc\fR[\fB=\fIoptions\fR] +.PD +Add a branch/call/jump (BCJ) filter to the filter chain. +These filters can be used only as a non-last filter +in the filter chain. +.IP "" +A BCJ filter converts relative addresses in +the machine code to their absolute counterparts. +This doesn't change the size of the data +but it increases redundancy, +which can help LZMA2 to produce 0\(en15\ % smaller +.B .xz +file. +The BCJ filters are always reversible, +so using a BCJ filter for wrong type of data +doesn't cause any data loss, although it may make +the compression ratio slightly worse. +The BCJ filters are very fast and +use an insignificant amount of memory. +.IP "" +These BCJ filters have known problems related to +the compression ratio: +.RS +.IP \(bu 3 +Some types of files containing executable code +(for example, object files, static libraries, and Linux kernel modules) +have the addresses in the instructions filled with filler values. +These BCJ filters will still do the address conversion, +which will make the compression worse with these files. +.IP \(bu 3 +If a BCJ filter is applied on an archive, +it is possible that it makes the compression ratio +worse than not using a BCJ filter. +For example, if there are similar or even identical executables +then filtering will likely make the files less similar +and thus compression is worse. +The contents of non-executable files in the same archive can matter too. +In practice one has to try with and without a BCJ filter to see +which is better in each situation. +.RE +.IP "" +Different instruction sets have different alignment: +the executable file must be aligned to a multiple of +this value in the input data to make the filter work. +.RS +.RS +.PP +.TS +tab(;); +l n l +l n l. +Filter;Alignment;Notes +x86;1;32-bit or 64-bit x86 +ARM;4; +ARM-Thumb;2; +ARM64;4;4096-byte alignment is best +PowerPC;4;Big endian only +IA-64;16;Itanium +SPARC;4; +.TE +.RE +.RE +.IP "" +Since the BCJ-filtered data is usually compressed with LZMA2, +the compression ratio may be improved slightly if +the LZMA2 options are set to match the +alignment of the selected BCJ filter. +For example, with the IA-64 filter, it's good to set +.B pb=4 +or even +.B pb=4,lp=4,lc=0 +with LZMA2 (2^4=16). +The x86 filter is an exception; +it's usually good to stick to LZMA2's default +four-byte alignment when compressing x86 executables. +.IP "" +All BCJ filters support the same +.IR options : +.RS +.TP +.BI start= offset +Specify the start +.I offset +that is used when converting between relative +and absolute addresses. +The +.I offset +must be a multiple of the alignment of the filter +(see the table above). +The default is zero. +In practice, the default is good; specifying a custom +.I offset +is almost never useful. +.RE +.TP +\fB\-\-delta\fR[\fB=\fIoptions\fR] +Add the Delta filter to the filter chain. +The Delta filter can be only used as a non-last filter +in the filter chain. +.IP "" +Currently only simple byte-wise delta calculation is supported. +It can be useful when compressing, for example, uncompressed bitmap images +or uncompressed PCM audio. +However, special purpose algorithms may give significantly better +results than Delta + LZMA2. +This is true especially with audio, +which compresses faster and better, for example, with +.BR flac (1). +.IP "" +Supported +.IR options : +.RS +.TP +.BI dist= distance +Specify the +.I distance +of the delta calculation in bytes. +.I distance +must be 1\(en256. +The default is 1. +.IP "" +For example, with +.B dist=2 +and eight-byte input A1 B1 A2 B3 A3 B5 A4 B7, the output will be +A1 B1 01 02 01 02 01 02. +.RE +. +.SS "Other options" +.TP +.BR \-q ", " \-\-quiet +Suppress warnings and notices. +Specify this twice to suppress errors too. +This option has no effect on the exit status. +That is, even if a warning was suppressed, +the exit status to indicate a warning is still used. +.TP +.BR \-v ", " \-\-verbose +Be verbose. +If standard error is connected to a terminal, +.B xz +will display a progress indicator. +Specifying +.B \-\-verbose +twice will give even more verbose output. +.IP "" +The progress indicator shows the following information: +.RS +.IP \(bu 3 +Completion percentage is shown +if the size of the input file is known. +That is, the percentage cannot be shown in pipes. +.IP \(bu 3 +Amount of compressed data produced (compressing) +or consumed (decompressing). +.IP \(bu 3 +Amount of uncompressed data consumed (compressing) +or produced (decompressing). +.IP \(bu 3 +Compression ratio, which is calculated by dividing +the amount of compressed data processed so far by +the amount of uncompressed data processed so far. +.IP \(bu 3 +Compression or decompression speed. +This is measured as the amount of uncompressed data consumed +(compression) or produced (decompression) per second. +It is shown after a few seconds have passed since +.B xz +started processing the file. +.IP \(bu 3 +Elapsed time in the format M:SS or H:MM:SS. +.IP \(bu 3 +Estimated remaining time is shown +only when the size of the input file is +known and a couple of seconds have already passed since +.B xz +started processing the file. +The time is shown in a less precise format which +never has any colons, for example, 2 min 30 s. +.RE +.IP "" +When standard error is not a terminal, +.B \-\-verbose +will make +.B xz +print the filename, compressed size, uncompressed size, +compression ratio, and possibly also the speed and elapsed time +on a single line to standard error after compressing or +decompressing the file. +The speed and elapsed time are included only when +the operation took at least a few seconds. +If the operation didn't finish, for example, due to user interruption, +also the completion percentage is printed +if the size of the input file is known. +.TP +.BR \-Q ", " \-\-no\-warn +Don't set the exit status to 2 +even if a condition worth a warning was detected. +This option doesn't affect the verbosity level, thus both +.B \-\-quiet +and +.B \-\-no\-warn +have to be used to not display warnings and +to not alter the exit status. +.TP +.B \-\-robot +Print messages in a machine-parsable format. +This is intended to ease writing frontends that want to use +.B xz +instead of liblzma, which may be the case with various scripts. +The output with this option enabled is meant to be stable across +.B xz +releases. +See the section +.B "ROBOT MODE" +for details. +.TP +.B \-\-info\-memory +Display, in human-readable format, how much physical memory (RAM) +and how many processor threads +.B xz +thinks the system has and the memory usage limits for compression +and decompression, and exit successfully. +.TP +.BR \-h ", " \-\-help +Display a help message describing the most commonly used options, +and exit successfully. +.TP +.BR \-H ", " \-\-long\-help +Display a help message describing all features of +.BR xz , +and exit successfully +.TP +.BR \-V ", " \-\-version +Display the version number of +.B xz +and liblzma in human readable format. +To get machine-parsable output, specify +.B \-\-robot +before +.BR \-\-version . +. +.SH "ROBOT MODE" +The robot mode is activated with the +.B \-\-robot +option. +It makes the output of +.B xz +easier to parse by other programs. +Currently +.B \-\-robot +is supported only together with +.BR \-\-version , +.BR \-\-info\-memory , +and +.BR \-\-list . +It will be supported for compression and +decompression in the future. +. +.SS Version +.B "xz \-\-robot \-\-version" +will print the version number of +.B xz +and liblzma in the following format: +.PP +.BI XZ_VERSION= XYYYZZZS +.br +.BI LIBLZMA_VERSION= XYYYZZZS +.TP +.I X +Major version. +.TP +.I YYY +Minor version. +Even numbers are stable. +Odd numbers are alpha or beta versions. +.TP +.I ZZZ +Patch level for stable releases or +just a counter for development releases. +.TP +.I S +Stability. +0 is alpha, 1 is beta, and 2 is stable. +.I S +should be always 2 when +.I YYY +is even. +.PP +.I XYYYZZZS +are the same on both lines if +.B xz +and liblzma are from the same XZ Utils release. +.PP +Examples: 4.999.9beta is +.B 49990091 +and +5.0.0 is +.BR 50000002 . +. +.SS "Memory limit information" +.B "xz \-\-robot \-\-info\-memory" +prints a single line with three tab-separated columns: +.IP 1. 4 +Total amount of physical memory (RAM) in bytes. +.IP 2. 4 +Memory usage limit for compression in bytes +.RB ( \-\-memlimit\-compress ). +A special value of +.B 0 +indicates the default setting +which for single-threaded mode is the same as no limit. +.IP 3. 4 +Memory usage limit for decompression in bytes +.RB ( \-\-memlimit\-decompress ). +A special value of +.B 0 +indicates the default setting +which for single-threaded mode is the same as no limit. +.IP 4. 4 +Since +.B xz +5.3.4alpha: +Memory usage for multi-threaded decompression in bytes +.RB ( \-\-memlimit\-mt\-decompress ). +This is never zero because a system-specific default value +shown in the column 5 +is used if no limit has been specified explicitly. +This is also never greater than the value in the column 3 +even if a larger value has been specified with +.BR \-\-memlimit\-mt\-decompress . +.IP 5. 4 +Since +.B xz +5.3.4alpha: +A system-specific default memory usage limit +that is used to limit the number of threads +when compressing with an automatic number of threads +.RB ( \-\-threads=0 ) +and no memory usage limit has been specified +.RB ( \-\-memlimit\-compress ). +This is also used as the default value for +.BR \-\-memlimit\-mt\-decompress . +.IP 6. 4 +Since +.B xz +5.3.4alpha: +Number of available processor threads. +.PP +In the future, the output of +.B "xz \-\-robot \-\-info\-memory" +may have more columns, but never more than a single line. +. +.SS "List mode" +.B "xz \-\-robot \-\-list" +uses tab-separated output. +The first column of every line has a string +that indicates the type of the information found on that line: +.TP +.B name +This is always the first line when starting to list a file. +The second column on the line is the filename. +.TP +.B file +This line contains overall information about the +.B .xz +file. +This line is always printed after the +.B name +line. +.TP +.B stream +This line type is used only when +.B \-\-verbose +was specified. +There are as many +.B stream +lines as there are streams in the +.B .xz +file. +.TP +.B block +This line type is used only when +.B \-\-verbose +was specified. +There are as many +.B block +lines as there are blocks in the +.B .xz +file. +The +.B block +lines are shown after all the +.B stream +lines; different line types are not interleaved. +.TP +.B summary +This line type is used only when +.B \-\-verbose +was specified twice. +This line is printed after all +.B block +lines. +Like the +.B file +line, the +.B summary +line contains overall information about the +.B .xz +file. +.TP +.B totals +This line is always the very last line of the list output. +It shows the total counts and sizes. +.PP +The columns of the +.B file +lines: +.PD 0 +.RS +.IP 2. 4 +Number of streams in the file +.IP 3. 4 +Total number of blocks in the stream(s) +.IP 4. 4 +Compressed size of the file +.IP 5. 4 +Uncompressed size of the file +.IP 6. 4 +Compression ratio, for example, +.BR 0.123 . +If ratio is over 9.999, three dashes +.RB ( \-\-\- ) +are displayed instead of the ratio. +.IP 7. 4 +Comma-separated list of integrity check names. +The following strings are used for the known check types: +.BR None , +.BR CRC32 , +.BR CRC64 , +and +.BR SHA\-256 . +For unknown check types, +.BI Unknown\- N +is used, where +.I N +is the Check ID as a decimal number (one or two digits). +.IP 8. 4 +Total size of stream padding in the file +.RE +.PD +.PP +The columns of the +.B stream +lines: +.PD 0 +.RS +.IP 2. 4 +Stream number (the first stream is 1) +.IP 3. 4 +Number of blocks in the stream +.IP 4. 4 +Compressed start offset +.IP 5. 4 +Uncompressed start offset +.IP 6. 4 +Compressed size (does not include stream padding) +.IP 7. 4 +Uncompressed size +.IP 8. 4 +Compression ratio +.IP 9. 4 +Name of the integrity check +.IP 10. 4 +Size of stream padding +.RE +.PD +.PP +The columns of the +.B block +lines: +.PD 0 +.RS +.IP 2. 4 +Number of the stream containing this block +.IP 3. 4 +Block number relative to the beginning of the stream +(the first block is 1) +.IP 4. 4 +Block number relative to the beginning of the file +.IP 5. 4 +Compressed start offset relative to the beginning of the file +.IP 6. 4 +Uncompressed start offset relative to the beginning of the file +.IP 7. 4 +Total compressed size of the block (includes headers) +.IP 8. 4 +Uncompressed size +.IP 9. 4 +Compression ratio +.IP 10. 4 +Name of the integrity check +.RE +.PD +.PP +If +.B \-\-verbose +was specified twice, additional columns are included on the +.B block +lines. +These are not displayed with a single +.BR \-\-verbose , +because getting this information requires many seeks +and can thus be slow: +.PD 0 +.RS +.IP 11. 4 +Value of the integrity check in hexadecimal +.IP 12. 4 +Block header size +.IP 13. 4 +Block flags: +.B c +indicates that compressed size is present, and +.B u +indicates that uncompressed size is present. +If the flag is not set, a dash +.RB ( \- ) +is shown instead to keep the string length fixed. +New flags may be added to the end of the string in the future. +.IP 14. 4 +Size of the actual compressed data in the block (this excludes +the block header, block padding, and check fields) +.IP 15. 4 +Amount of memory (in bytes) required to decompress +this block with this +.B xz +version +.IP 16. 4 +Filter chain. +Note that most of the options used at compression time +cannot be known, because only the options +that are needed for decompression are stored in the +.B .xz +headers. +.RE +.PD +.PP +The columns of the +.B summary +lines: +.PD 0 +.RS +.IP 2. 4 +Amount of memory (in bytes) required to decompress +this file with this +.B xz +version +.IP 3. 4 +.B yes +or +.B no +indicating if all block headers have both compressed size and +uncompressed size stored in them +.PP +.I Since +.B xz +.I 5.1.2alpha: +.IP 4. 4 +Minimum +.B xz +version required to decompress the file +.RE +.PD +.PP +The columns of the +.B totals +line: +.PD 0 +.RS +.IP 2. 4 +Number of streams +.IP 3. 4 +Number of blocks +.IP 4. 4 +Compressed size +.IP 5. 4 +Uncompressed size +.IP 6. 4 +Average compression ratio +.IP 7. 4 +Comma-separated list of integrity check names +that were present in the files +.IP 8. 4 +Stream padding size +.IP 9. 4 +Number of files. +This is here to +keep the order of the earlier columns the same as on +.B file +lines. +.PD +.RE +.PP +If +.B \-\-verbose +was specified twice, additional columns are included on the +.B totals +line: +.PD 0 +.RS +.IP 10. 4 +Maximum amount of memory (in bytes) required to decompress +the files with this +.B xz +version +.IP 11. 4 +.B yes +or +.B no +indicating if all block headers have both compressed size and +uncompressed size stored in them +.PP +.I Since +.B xz +.I 5.1.2alpha: +.IP 12. 4 +Minimum +.B xz +version required to decompress the file +.RE +.PD +.PP +Future versions may add new line types and +new columns can be added to the existing line types, +but the existing columns won't be changed. +. +.SH "EXIT STATUS" +.TP +.B 0 +All is good. +.TP +.B 1 +An error occurred. +.TP +.B 2 +Something worth a warning occurred, +but no actual errors occurred. +.PP +Notices (not warnings or errors) printed on standard error +don't affect the exit status. +. +.SH ENVIRONMENT +.B xz +parses space-separated lists of options +from the environment variables +.B XZ_DEFAULTS +and +.BR XZ_OPT , +in this order, before parsing the options from the command line. +Note that only options are parsed from the environment variables; +all non-options are silently ignored. +Parsing is done with +.BR getopt_long (3) +which is used also for the command line arguments. +.TP +.B XZ_DEFAULTS +User-specific or system-wide default options. +Typically this is set in a shell initialization script to enable +.BR xz 's +memory usage limiter by default. +Excluding shell initialization scripts +and similar special cases, scripts must never set or unset +.BR XZ_DEFAULTS . +.TP +.B XZ_OPT +This is for passing options to +.B xz +when it is not possible to set the options directly on the +.B xz +command line. +This is the case when +.B xz +is run by a script or tool, for example, GNU +.BR tar (1): +.RS +.RS +.PP +.nf +.ft CW +XZ_OPT=\-2v tar caf foo.tar.xz foo +.ft R +.fi +.RE +.RE +.IP "" +Scripts may use +.BR XZ_OPT , +for example, to set script-specific default compression options. +It is still recommended to allow users to override +.B XZ_OPT +if that is reasonable. +For example, in +.BR sh (1) +scripts one may use something like this: +.RS +.RS +.PP +.nf +.ft CW +XZ_OPT=${XZ_OPT\-"\-7e"} +export XZ_OPT +.ft R +.fi +.RE +.RE +. +.SH "LZMA UTILS COMPATIBILITY" +The command line syntax of +.B xz +is practically a superset of +.BR lzma , +.BR unlzma , +and +.B lzcat +as found from LZMA Utils 4.32.x. +In most cases, it is possible to replace +LZMA Utils with XZ Utils without breaking existing scripts. +There are some incompatibilities though, +which may sometimes cause problems. +. +.SS "Compression preset levels" +The numbering of the compression level presets is not identical in +.B xz +and LZMA Utils. +The most important difference is how dictionary sizes +are mapped to different presets. +Dictionary size is roughly equal to the decompressor memory usage. +.RS +.PP +.TS +tab(;); +c c c +c n n. +Level;xz;LZMA Utils +\-0;256 KiB;N/A +\-1;1 MiB;64 KiB +\-2;2 MiB;1 MiB +\-3;4 MiB;512 KiB +\-4;4 MiB;1 MiB +\-5;8 MiB;2 MiB +\-6;8 MiB;4 MiB +\-7;16 MiB;8 MiB +\-8;32 MiB;16 MiB +\-9;64 MiB;32 MiB +.TE +.RE +.PP +The dictionary size differences affect +the compressor memory usage too, +but there are some other differences between +LZMA Utils and XZ Utils, which +make the difference even bigger: +.RS +.PP +.TS +tab(;); +c c c +c n n. +Level;xz;LZMA Utils 4.32.x +\-0;3 MiB;N/A +\-1;9 MiB;2 MiB +\-2;17 MiB;12 MiB +\-3;32 MiB;12 MiB +\-4;48 MiB;16 MiB +\-5;94 MiB;26 MiB +\-6;94 MiB;45 MiB +\-7;186 MiB;83 MiB +\-8;370 MiB;159 MiB +\-9;674 MiB;311 MiB +.TE +.RE +.PP +The default preset level in LZMA Utils is +.B \-7 +while in XZ Utils it is +.BR \-6 , +so both use an 8 MiB dictionary by default. +. +.SS "Streamed vs. non-streamed .lzma files" +The uncompressed size of the file can be stored in the +.B .lzma +header. +LZMA Utils does that when compressing regular files. +The alternative is to mark that uncompressed size is unknown +and use end-of-payload marker to indicate +where the decompressor should stop. +LZMA Utils uses this method when uncompressed size isn't known, +which is the case, for example, in pipes. +.PP +.B xz +supports decompressing +.B .lzma +files with or without end-of-payload marker, but all +.B .lzma +files created by +.B xz +will use end-of-payload marker and have uncompressed size +marked as unknown in the +.B .lzma +header. +This may be a problem in some uncommon situations. +For example, a +.B .lzma +decompressor in an embedded device might work +only with files that have known uncompressed size. +If you hit this problem, you need to use LZMA Utils +or LZMA SDK to create +.B .lzma +files with known uncompressed size. +. +.SS "Unsupported .lzma files" +The +.B .lzma +format allows +.I lc +values up to 8, and +.I lp +values up to 4. +LZMA Utils can decompress files with any +.I lc +and +.IR lp , +but always creates files with +.B lc=3 +and +.BR lp=0 . +Creating files with other +.I lc +and +.I lp +is possible with +.B xz +and with LZMA SDK. +.PP +The implementation of the LZMA1 filter in liblzma +requires that the sum of +.I lc +and +.I lp +must not exceed 4. +Thus, +.B .lzma +files, which exceed this limitation, cannot be decompressed with +.BR xz . +.PP +LZMA Utils creates only +.B .lzma +files which have a dictionary size of +.RI "2^" n +(a power of 2) but accepts files with any dictionary size. +liblzma accepts only +.B .lzma +files which have a dictionary size of +.RI "2^" n +or +.RI "2^" n " + 2^(" n "\-1)." +This is to decrease false positives when detecting +.B .lzma +files. +.PP +These limitations shouldn't be a problem in practice, +since practically all +.B .lzma +files have been compressed with settings that liblzma will accept. +. +.SS "Trailing garbage" +When decompressing, +LZMA Utils silently ignore everything after the first +.B .lzma +stream. +In most situations, this is a bug. +This also means that LZMA Utils +don't support decompressing concatenated +.B .lzma +files. +.PP +If there is data left after the first +.B .lzma +stream, +.B xz +considers the file to be corrupt unless +.B \-\-single\-stream +was used. +This may break obscure scripts which have +assumed that trailing garbage is ignored. +. +.SH NOTES +. +.SS "Compressed output may vary" +The exact compressed output produced from +the same uncompressed input file +may vary between XZ Utils versions even if +compression options are identical. +This is because the encoder can be improved +(faster or better compression) +without affecting the file format. +The output can vary even between different +builds of the same XZ Utils version, +if different build options are used. +.PP +The above means that once +.B \-\-rsyncable +has been implemented, +the resulting files won't necessarily be rsyncable +unless both old and new files have been compressed +with the same xz version. +This problem can be fixed if a part of the encoder +implementation is frozen to keep rsyncable output +stable across xz versions. +. +.SS "Embedded .xz decompressors" +Embedded +.B .xz +decompressor implementations like XZ Embedded don't necessarily +support files created with integrity +.I check +types other than +.B none +and +.BR crc32 . +Since the default is +.BR \-\-check=crc64 , +you must use +.B \-\-check=none +or +.B \-\-check=crc32 +when creating files for embedded systems. +.PP +Outside embedded systems, all +.B .xz +format decompressors support all the +.I check +types, or at least are able to decompress +the file without verifying the +integrity check if the particular +.I check +is not supported. +.PP +XZ Embedded supports BCJ filters, +but only with the default start offset. +. +.SH EXAMPLES +. +.SS Basics +Compress the file +.I foo +into +.I foo.xz +using the default compression level +.RB ( \-6 ), +and remove +.I foo +if compression is successful: +.RS +.PP +.nf +.ft CW +xz foo +.ft R +.fi +.RE +.PP +Decompress +.I bar.xz +into +.I bar +and don't remove +.I bar.xz +even if decompression is successful: +.RS +.PP +.nf +.ft CW +xz \-dk bar.xz +.ft R +.fi +.RE +.PP +Create +.I baz.tar.xz +with the preset +.B \-4e +.RB ( "\-4 \-\-extreme" ), +which is slower than the default +.BR \-6 , +but needs less memory for compression and decompression (48\ MiB +and 5\ MiB, respectively): +.RS +.PP +.nf +.ft CW +tar cf \- baz | xz \-4e > baz.tar.xz +.ft R +.fi +.RE +.PP +A mix of compressed and uncompressed files can be decompressed +to standard output with a single command: +.RS +.PP +.nf +.ft CW +xz \-dcf a.txt b.txt.xz c.txt d.txt.lzma > abcd.txt +.ft R +.fi +.RE +. +.SS "Parallel compression of many files" +On GNU and *BSD, +.BR find (1) +and +.BR xargs (1) +can be used to parallelize compression of many files: +.RS +.PP +.nf +.ft CW +find . \-type f \e! \-name '*.xz' \-print0 \e + | xargs \-0r \-P4 \-n16 xz \-T1 +.ft R +.fi +.RE +.PP +The +.B \-P +option to +.BR xargs (1) +sets the number of parallel +.B xz +processes. +The best value for the +.B \-n +option depends on how many files there are to be compressed. +If there are only a couple of files, +the value should probably be 1; +with tens of thousands of files, +100 or even more may be appropriate to reduce the number of +.B xz +processes that +.BR xargs (1) +will eventually create. +.PP +The option +.B \-T1 +for +.B xz +is there to force it to single-threaded mode, because +.BR xargs (1) +is used to control the amount of parallelization. +. +.SS "Robot mode" +Calculate how many bytes have been saved in total +after compressing multiple files: +.RS +.PP +.nf +.ft CW +xz \-\-robot \-\-list *.xz | awk '/^totals/{print $5\-$4}' +.ft R +.fi +.RE +.PP +A script may want to know that it is using new enough +.BR xz . +The following +.BR sh (1) +script checks that the version number of the +.B xz +tool is at least 5.0.0. +This method is compatible with old beta versions, +which didn't support the +.B \-\-robot +option: +.RS +.PP +.nf +.ft CW +if ! eval "$(xz \-\-robot \-\-version 2> /dev/null)" || + [ "$XZ_VERSION" \-lt 50000002 ]; then + echo "Your xz is too old." +fi +unset XZ_VERSION LIBLZMA_VERSION +.ft R +.fi +.RE +.PP +Set a memory usage limit for decompression using +.BR XZ_OPT , +but if a limit has already been set, don't increase it: +.RS +.PP +.nf +.ft CW +NEWLIM=$((123 << 20))\ \ # 123 MiB +OLDLIM=$(xz \-\-robot \-\-info\-memory | cut \-f3) +if [ $OLDLIM \-eq 0 \-o $OLDLIM \-gt $NEWLIM ]; then + XZ_OPT="$XZ_OPT \-\-memlimit\-decompress=$NEWLIM" + export XZ_OPT +fi +.ft R +.fi +.RE +. +.SS "Custom compressor filter chains" +The simplest use for custom filter chains is +customizing a LZMA2 preset. +This can be useful, +because the presets cover only a subset of the +potentially useful combinations of compression settings. +.PP +The CompCPU columns of the tables +from the descriptions of the options +.BR "\-0" " ... " "\-9" +and +.B \-\-extreme +are useful when customizing LZMA2 presets. +Here are the relevant parts collected from those two tables: +.RS +.PP +.TS +tab(;); +c c +n n. +Preset;CompCPU +\-0;0 +\-1;1 +\-2;2 +\-3;3 +\-4;4 +\-5;5 +\-6;6 +\-5e;7 +\-6e;8 +.TE +.RE +.PP +If you know that a file requires +somewhat big dictionary (for example, 32\ MiB) to compress well, +but you want to compress it quicker than +.B "xz \-8" +would do, a preset with a low CompCPU value (for example, 1) +can be modified to use a bigger dictionary: +.RS +.PP +.nf +.ft CW +xz \-\-lzma2=preset=1,dict=32MiB foo.tar +.ft R +.fi +.RE +.PP +With certain files, the above command may be faster than +.B "xz \-6" +while compressing significantly better. +However, it must be emphasized that only some files benefit from +a big dictionary while keeping the CompCPU value low. +The most obvious situation, +where a big dictionary can help a lot, +is an archive containing very similar files +of at least a few megabytes each. +The dictionary size has to be significantly bigger +than any individual file to allow LZMA2 to take +full advantage of the similarities between consecutive files. +.PP +If very high compressor and decompressor memory usage is fine, +and the file being compressed is +at least several hundred megabytes, it may be useful +to use an even bigger dictionary than the 64 MiB that +.B "xz \-9" +would use: +.RS +.PP +.nf +.ft CW +xz \-vv \-\-lzma2=dict=192MiB big_foo.tar +.ft R +.fi +.RE +.PP +Using +.B \-vv +.RB ( "\-\-verbose \-\-verbose" ) +like in the above example can be useful +to see the memory requirements +of the compressor and decompressor. +Remember that using a dictionary bigger than +the size of the uncompressed file is waste of memory, +so the above command isn't useful for small files. +.PP +Sometimes the compression time doesn't matter, +but the decompressor memory usage has to be kept low, for example, +to make it possible to decompress the file on an embedded system. +The following command uses +.B \-6e +.RB ( "\-6 \-\-extreme" ) +as a base and sets the dictionary to only 64\ KiB. +The resulting file can be decompressed with XZ Embedded +(that's why there is +.BR \-\-check=crc32 ) +using about 100\ KiB of memory. +.RS +.PP +.nf +.ft CW +xz \-\-check=crc32 \-\-lzma2=preset=6e,dict=64KiB foo +.ft R +.fi +.RE +.PP +If you want to squeeze out as many bytes as possible, +adjusting the number of literal context bits +.RI ( lc ) +and number of position bits +.RI ( pb ) +can sometimes help. +Adjusting the number of literal position bits +.RI ( lp ) +might help too, but usually +.I lc +and +.I pb +are more important. +For example, a source code archive contains mostly US-ASCII text, +so something like the following might give +slightly (like 0.1\ %) smaller file than +.B "xz \-6e" +(try also without +.BR lc=4 ): +.RS +.PP +.nf +.ft CW +xz \-\-lzma2=preset=6e,pb=0,lc=4 source_code.tar +.ft R +.fi +.RE +.PP +Using another filter together with LZMA2 can improve +compression with certain file types. +For example, to compress a x86-32 or x86-64 shared library +using the x86 BCJ filter: +.RS +.PP +.nf +.ft CW +xz \-\-x86 \-\-lzma2 libfoo.so +.ft R +.fi +.RE +.PP +Note that the order of the filter options is significant. +If +.B \-\-x86 +is specified after +.BR \-\-lzma2 , +.B xz +will give an error, +because there cannot be any filter after LZMA2, +and also because the x86 BCJ filter cannot be used +as the last filter in the chain. +.PP +The Delta filter together with LZMA2 +can give good results with bitmap images. +It should usually beat PNG, +which has a few more advanced filters than simple +delta but uses Deflate for the actual compression. +.PP +The image has to be saved in uncompressed format, +for example, as uncompressed TIFF. +The distance parameter of the Delta filter is set +to match the number of bytes per pixel in the image. +For example, 24-bit RGB bitmap needs +.BR dist=3 , +and it is also good to pass +.B pb=0 +to LZMA2 to accommodate the three-byte alignment: +.RS +.PP +.nf +.ft CW +xz \-\-delta=dist=3 \-\-lzma2=pb=0 foo.tiff +.ft R +.fi +.RE +.PP +If multiple images have been put into a single archive (for example, +.BR .tar ), +the Delta filter will work on that too as long as all images +have the same number of bytes per pixel. +. +.SH "SEE ALSO" +.BR xzdec (1), +.BR xzdiff (1), +.BR xzgrep (1), +.BR xzless (1), +.BR xzmore (1), +.BR gzip (1), +.BR bzip2 (1), +.BR 7z (1) +.PP +XZ Utils: <https://tukaani.org/xz/> +.br +XZ Embedded: <https://tukaani.org/xz/embedded.html> +.br +LZMA SDK: <http://7-zip.org/sdk.html> diff --git a/src/xz/xz_w32res.rc b/src/xz/xz_w32res.rc new file mode 100644 index 0000000..bad3020 --- /dev/null +++ b/src/xz/xz_w32res.rc @@ -0,0 +1,12 @@ +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#define MY_TYPE VFT_APP +#define MY_NAME "xz" +#define MY_SUFFIX ".exe" +#define MY_DESC "xz data compression tool for .xz and .lzma files" +#include "common_w32res.rc" |