diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2022-12-08 15:52:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2022-12-08 15:52:09 +0000 |
commit | 4568b11461f80bbc5fa0fb2c522205683ea14ec7 (patch) | |
tree | 58834b3aa970b99bc0a1b0fcb9ae5510ca149cb1 | |
parent | Adding upstream version 1.12~pre2. (diff) | |
download | zutils-4568b11461f80bbc5fa0fb2c522205683ea14ec7.tar.xz zutils-4568b11461f80bbc5fa0fb2c522205683ea14ec7.zip |
Adding upstream version 1.12~rc1.upstream/1.12_rc1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
-rw-r--r-- | ChangeLog | 34 | ||||
-rw-r--r-- | INSTALL | 10 | ||||
-rw-r--r-- | Makefile.in | 10 | ||||
-rw-r--r-- | NEWS | 50 | ||||
-rw-r--r-- | README | 3 | ||||
-rwxr-xr-x | configure | 13 | ||||
-rw-r--r-- | doc/zcat.1 | 7 | ||||
-rw-r--r-- | doc/zcmp.1 | 26 | ||||
-rw-r--r-- | doc/zdiff.1 | 7 | ||||
-rw-r--r-- | doc/zgrep.1 | 7 | ||||
-rw-r--r-- | doc/ztest.1 | 29 | ||||
-rw-r--r-- | doc/zupdate.1 | 11 | ||||
-rw-r--r-- | doc/zutils.info | 366 | ||||
-rw-r--r-- | doc/zutils.texi | 424 | ||||
-rw-r--r-- | rc.cc | 88 | ||||
-rw-r--r-- | rc.h | 21 | ||||
-rw-r--r-- | recursive.cc | 2 | ||||
-rwxr-xr-x | testsuite/check.sh | 149 | ||||
-rw-r--r-- | testsuite/zcat_vs.dat | 4 | ||||
-rw-r--r-- | zcat.cc | 24 | ||||
-rw-r--r-- | zcmp.cc | 148 | ||||
-rw-r--r-- | zcmpdiff.cc | 12 | ||||
-rw-r--r-- | zdiff.cc | 55 | ||||
-rw-r--r-- | zgrep.cc | 40 | ||||
-rw-r--r-- | ztest.cc | 63 | ||||
-rw-r--r-- | zupdate.cc | 157 | ||||
-rw-r--r-- | zutils.cc | 14 | ||||
-rw-r--r-- | zutils.conf (renamed from zutilsrc) | 5 | ||||
-rw-r--r-- | zutils.h | 5 |
29 files changed, 1121 insertions, 663 deletions
@@ -1,9 +1,29 @@ +2022-12-05 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.12-rc1 released. + * zutilsrc: Rename to zutils.conf. Search for it in $XDG_CONFIG_HOME. + (Suggested by Adam Tuja). + * Allow '-O, --force-format' force also uncompressed format. + * zcmp.cc: New option '-H, --hexadecimal'. + Change long name of option '-s' to '--script'. + (Following a similar change made to GNU ed). + Assign short name '-q' to options '--quiet' and '--silent'. + Separate option '-l, --list' from '-v, --verbose'. + (cmp): Print byte and line in EOF message like GNU cmp. + * ztest.cc: Exit with status 2 if a file has wrong extension. + * zupdate.cc: New option '-d, --destdir'. + (zupdate_file): Pass '-q -s' to zcmp if verbosity < 0. + * rc.cc (show_version): Print the versions of the compressors used. + (show_option_error): New function showing argument and option name. + * zutils.texi: Document that format is detected by its magic bytes. + * check.sh: Test tarlz (if available) as compressor for zupdate. + 2022-04-12 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.12-pre2 released. * zgrep.cc: Accept option '-Z, --null'. (Reported by Leah Neukirchen). * zupdate.cc: New options '-e, --expand-extensions', - '-i, --ignore-errors'. + '-i, --ignore-errors'. ('-i' suggested by Antoni Sawicki). * Support compress'd (.Z) files through gzip in all utilities. 2022-03-06 Antonio Diaz Diaz <antonio@gnu.org> @@ -126,13 +146,13 @@ * Version 1.0 released. * Add new option '--format' to all utilities. - * main.cc (main): Make 'grep_show_name' tri-state so that file - name is no prefixed to output by default when searching one - file and '--recursive' has not been selected. + * main.cc (main): Make 'grep_show_name' tri-state so that file name + is not prefixed to the output by default when searching one file + and '--recursive' has not been selected. * zgrep.cc: Fix output of option '-L' (it behaved like '-l'). * zcmp.cc: Fix deadlock when option '-n' is used. - * zdiff.cc (set_data_feeder): Call compressor with option '-q' - only if verbosity < 0. + * zdiff.cc (set_data_feeder): Call compressor with option '-q' only + if verbosity < 0. * zutils.cc (set_data_feeder): Likewise. * Change quote characters in messages as advised by GNU Standards. * configure: Options now accept a separate argument. @@ -145,7 +165,7 @@ 2011-01-11 Antonio Diaz Diaz <ant_diaz@teleline.es> * Version 0.9 released. - * configure: New options 'DIFF' and 'GREP'. + * configure: New variables 'DIFF' and 'GREP'. * zcmp.cc: Fix deadlock when files differ. * zgrep.cc: Fix deadlock when binary file matches. @@ -1,6 +1,6 @@ Requirements ------------ -You will need a C++98 compiler with suport for 'long long'. +You will need a C++98 compiler with support for 'long long'. (gcc 3.3.6 or newer is recommended). I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards compliant compiler. @@ -32,8 +32,8 @@ Procedure or lzip -cd zutils[version].tar.lz | tar -xf - -This creates the directory ./zutils[version] containing the source from -the main archive. +This creates the directory ./zutils[version] containing the source code +extracted from the archive. 2. Change to zutils directory and run configure. (Try 'configure --help' for usage instructions). @@ -68,8 +68,8 @@ object files and executables to go and run the 'configure' script. 'configure' automatically checks for the source code in '.', in '..', and in the directory that 'configure' is in. -'configure' recognizes the option '--srcdir=DIR' to control where to -look for the sources. Usually 'configure' can determine that directory +'configure' recognizes the option '--srcdir=DIR' to control where to look +for the source code. Usually 'configure' can determine that directory automatically. After running 'configure', you can run 'make' and 'make install' as diff --git a/Makefile.in b/Makefile.in index d02d15d..fbd7102 100644 --- a/Makefile.in +++ b/Makefile.in @@ -84,7 +84,7 @@ doc : info man info : $(VPATH)/doc/$(pkgname).info $(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi - cd $(VPATH)/doc && makeinfo $(pkgname).texi + cd $(VPATH)/doc && $(MAKEINFO) $(pkgname).texi man : $(VPATH)/doc/zcat.1 $(VPATH)/doc/zcmp.1 $(VPATH)/doc/zdiff.1 \ $(VPATH)/doc/zgrep.1 $(VPATH)/doc/ztest.1 $(VPATH)/doc/zupdate.1 @@ -134,9 +134,9 @@ install-bin : all $(INSTALL_PROGRAM) ./zgrep "$(DESTDIR)$(bindir)/zgrep" $(INSTALL_PROGRAM) ./ztest "$(DESTDIR)$(bindir)/ztest" $(INSTALL_PROGRAM) ./zupdate "$(DESTDIR)$(bindir)/zupdate" - if [ ! -e "$(DESTDIR)$(sysconfdir)/$(pkgname)rc" ] ; then \ + if [ ! -e "$(DESTDIR)$(sysconfdir)/$(pkgname).conf" ] ; then \ if [ ! -d "$(DESTDIR)$(sysconfdir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(sysconfdir)" ; fi ; \ - $(INSTALL_DATA) $(VPATH)/$(pkgname)rc "$(DESTDIR)$(sysconfdir)/$(pkgname)rc" ; \ + $(INSTALL_DATA) $(VPATH)/$(pkgname).conf "$(DESTDIR)$(sysconfdir)/$(pkgname).conf" ; \ fi install-bin-strip : all @@ -187,7 +187,7 @@ uninstall-bin : -rm -f "$(DESTDIR)$(bindir)/zgrep" -rm -f "$(DESTDIR)$(bindir)/ztest" -rm -f "$(DESTDIR)$(bindir)/zupdate" - -rm -f "$(DESTDIR)$(sysconfdir)/$(pkgname)rc" + -rm -f "$(DESTDIR)$(sysconfdir)/$(pkgname).conf" uninstall-info : -if $(CAN_RUN_INSTALLINFO) ; then \ @@ -217,7 +217,7 @@ dist : doc $(DISTNAME)/doc/*.1 \ $(DISTNAME)/doc/$(pkgname).info \ $(DISTNAME)/doc/$(pkgname).texi \ - $(DISTNAME)/$(pkgname)rc \ + $(DISTNAME)/$(pkgname).conf \ $(DISTNAME)/*.h \ $(DISTNAME)/*.cc \ $(DISTNAME)/z*.in \ @@ -1,16 +1,64 @@ Changes in version 1.12: +The zutils configuration file 'zutilsrc' has been renamed to 'zutils.conf'. +Zutils now looks for the configuration file in $XDG_CONFIG_HOME/zutils.conf +instead of $HOME/.zutilsrc. (XDG_CONFIG_HOME defaults to $HOME/.config). +(Suggested by Adam Tuja). + +In zcat, zcmp, zdiff, and zgrep, the option '-O, --force-format' now can +force also "uncompressed" format. + +zcmp now accepts the option '-H, --hexadecimal' to print byte values in +hexadecimal instead of octal. + +In zcmp: + The long name of option '-s' has been changed to '--script' following a + similar change made to GNU ed. + + The short name '-q' has been assigned to options '--quiet' and '--silent'. + + Option '-q' now only suppresses diagnostic messages written to stderr. + + Option '-s' now only suppresses messages about file differences written to + stdout or stderr. + + Option '-l, --list' is now different from option '-v, --verbose', which + now undoes the effect of '--quiet'. + +zcmp now prints byte and line in EOF message like GNU cmp: +"zcmp: EOF on FILE after byte B, in line L". + zgrep now also accepts the following options: '-G, --basic-regexp', '--label=<label>', '--line-buffered', '-P, --perl-regexp', '--silent', '-T, --initial-tab', '-U, --binary', and '-Z, --null'. (Reported by Chris Jamboretz and Leah Neukirchen). +ztest now exits with status 2 if an uncompressed file has a compressed file +name extension, or if a compressed file has a wrong compressed extension. + +zupdate now accepts option '-d, --destdir' to write recompressed files to +another directory. This allows, for example, recompressing files from a +read-only file system to another place without needing to copy or link them +to the destination directory first. + zupdate now accepts option '-e, --expand-extensions', which makes it expand combined file name extensions; tgz -> tar.lz. zupdate now also accepts option '-i, --ignore-errors', which makes it ignore -non-fatal errors. +non-fatal errors. (Suggested by Antoni Sawicki). All utilities now support compress'd (.Z) files through gzip. For this to work, the gzip program used (for example GNU gzip) must be able to decompress .Z files. + +At verbosity level 1 (2 for zdiff and zgrep) or higher, '-V, --version' now +prints the versions of the compressors used (limited by option '-M, --format'). +(The compressors used must support option '-V' for this to work). + +Diagnostics caused by invalid arguments to command line options now show the +argument and the name of the option. + +It has been documented in the manual that the data format is detected by its +magic bytes, not by the file name extension. + +The testsuite now tests tarlz (if available) as compressor for zupdate. @@ -4,7 +4,8 @@ Zutils is a collection of utilities able to process any combination of compressed and uncompressed files transparently. If any file given, including standard input, is compressed, its decompressed content is used. Compressed files are decompressed on the fly; no temporary files are -created. +created. Data format is detected by its magic bytes, not by the file name +extension. These utilities are not wrapper scripts but safer and more efficient C++ programs. In particular the option '--recursive' is very efficient in @@ -6,7 +6,7 @@ # to copy, distribute, and modify it. pkgname=zutils -pkgversion=1.12-pre2 +pkgversion=1.12-rc1 srctrigger=doc/${pkgname}.texi # clear some things potentially inherited from environment. @@ -24,6 +24,7 @@ CXX=g++ CPPFLAGS= CXXFLAGS='-Wall -W -O2' LDFLAGS= +MAKEINFO=makeinfo DIFF=diff GREP=grep @@ -59,7 +60,7 @@ while [ $# != 0 ] ; do echo "Options and variables: [defaults in brackets]" echo " -h, --help display this help and exit" echo " -V, --version output version information and exit" - echo " --srcdir=DIR find the sources in DIR [. or ..]" + echo " --srcdir=DIR find the source code in DIR [. or ..]" echo " --prefix=DIR install into DIR [${prefix}]" echo " --exec-prefix=DIR base directory for arch-dependent files [${exec_prefix}]" echo " --bindir=DIR user executables directory [${bindir}]" @@ -72,6 +73,7 @@ while [ $# != 0 ] ; do echo " CXXFLAGS=OPTIONS command line options for the C++ compiler [${CXXFLAGS}]" echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS" echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]" + echo " MAKEINFO=NAME makeinfo program to use [${MAKEINFO}]" echo " DIFF=NAME diff program to use with zdiff [${DIFF}]" echo " GREP=NAME grep program to use with zgrep [${GREP}]" echo @@ -103,6 +105,7 @@ while [ $# != 0 ] ; do CXXFLAGS=*) CXXFLAGS=${optarg} ;; CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;; LDFLAGS=*) LDFLAGS=${optarg} ;; + MAKEINFO=*) MAKEINFO=${optarg} ;; DIFF=*) DIFF=${optarg} ;; GREP=*) GREP=${optarg} ;; @@ -124,7 +127,7 @@ while [ $# != 0 ] ; do fi done -# Find the source files, if location was not specified. +# Find the source code, if location was not specified. srcdirtext= if [ -z "${srcdir}" ] ; then srcdirtext="or . or .." ; srcdir=. @@ -136,7 +139,7 @@ if [ -z "${srcdir}" ] ; then fi if [ ! -r "${srcdir}/${srctrigger}" ] ; then - echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2 + echo "configure: Can't find source code in ${srcdir} ${srcdirtext}" 1>&2 echo "configure: (At least ${srctrigger} is missing)." 1>&2 exit 1 fi @@ -174,6 +177,7 @@ echo "CXX = ${CXX}" echo "CPPFLAGS = ${CPPFLAGS}" echo "CXXFLAGS = ${CXXFLAGS}" echo "LDFLAGS = ${LDFLAGS}" +echo "MAKEINFO = ${MAKEINFO}" echo "DIFF = ${DIFF}" echo "GREP = ${GREP}" rm -f Makefile @@ -199,6 +203,7 @@ CXX = ${CXX} CPPFLAGS = ${CPPFLAGS} CXXFLAGS = ${CXXFLAGS} LDFLAGS = ${LDFLAGS} +MAKEINFO = ${MAKEINFO} DIFF = ${DIFF} GREP = ${GREP} EOF @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH ZCAT "1" "April 2022" "zutils 1.12-pre2" "User Commands" +.TH ZCAT "1" "December 2022" "zutils 1.12-rc1" "User Commands" .SH NAME zcat \- decompress and concatenate files to standard output .SH SYNOPSIS @@ -54,7 +54,7 @@ number all output lines don't read runtime configuration file .TP \fB\-O\fR, \fB\-\-force\-format=\fR<fmt> -force the format given (bz2, gz, lz, xz, zst) +force the input format .TP \fB\-q\fR, \fB\-\-quiet\fR suppress all messages @@ -94,6 +94,9 @@ set compressor and options for xz format .TP \fB\-\-zst=\fR<command> set compressor and options for zstd format +.PP +Valid formats for options '\-M' and '\-O' are 'bz2', 'gz', 'lz', 'xz', 'zst', +and 'un' for uncompressed. .SH "REPORTING BUGS" Report bugs to zutils\-bug@nongnu.org .br @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH ZCMP "1" "April 2022" "zutils 1.12-pre2" "User Commands" +.TH ZCMP "1" "December 2022" "zutils 1.12-rc1" "User Commands" .SH NAME zcmp \- decompress and compare two files byte by byte .SH SYNOPSIS @@ -37,6 +37,9 @@ output version information and exit \fB\-b\fR, \fB\-\-print\-bytes\fR print differing bytes .TP +\fB\-H\fR, \fB\-\-hexadecimal\fR +print hexadecimal values instead of octal +.TP \fB\-i\fR, \fB\-\-ignore\-initial=\fR<n>[:<n2>] ignore differences in the first <n> bytes .TP @@ -53,16 +56,16 @@ compare at most <n> bytes don't read runtime configuration file .TP \fB\-O\fR, \fB\-\-force\-format\fR=\fI\,[\/\fR<f1>][,<f2>] -force the formats given (bz2,gz,lz,xz,zst) +force one or both input formats .TP -\fB\-q\fR, \fB\-\-quiet\fR -suppress all messages +\fB\-q\fR, \fB\-\-quiet\fR, \fB\-\-silent\fR +suppress diagnostics written to stderr .TP -\fB\-s\fR, \fB\-\-silent\fR -(same as \fB\-\-quiet\fR) +\fB\-s\fR, \fB\-\-script\fR +suppress messages about file differences .TP \fB\-v\fR, \fB\-\-verbose\fR -verbose mode (same as \fB\-\-list\fR) +verbose mode (opposite of \fB\-\-quiet\fR) .TP \fB\-\-bz2=\fR<command> set compressor and options for bzip2 format @@ -79,8 +82,13 @@ set compressor and options for xz format \fB\-\-zst=\fR<command> set compressor and options for zstd format .PP -Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, -Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... +Valid formats for options '\-M' and '\-O' are 'bz2', 'gz', 'lz', 'xz', 'zst', +and 'un' for uncompressed. +.PP +Byte counts given as arguments to options may be expressed in decimal, +hexadecimal, or octal (using the same syntax as integer constants in C++), +and may be followed by a multiplier: k = kB = 10^3 = 1000, +Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc. .SH "REPORTING BUGS" Report bugs to zutils\-bug@nongnu.org .br diff --git a/doc/zdiff.1 b/doc/zdiff.1 index 55342c6..a5ff42b 100644 --- a/doc/zdiff.1 +++ b/doc/zdiff.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH ZDIFF "1" "April 2022" "zutils 1.12-pre2" "User Commands" +.TH ZDIFF "1" "December 2022" "zutils 1.12-rc1" "User Commands" .SH NAME zdiff \- decompress and compare two files line by line .SH SYNOPSIS @@ -68,7 +68,7 @@ process only the formats in <list> don't read runtime configuration file .TP \fB\-O\fR, \fB\-\-force\-format\fR=\fI\,[\/\fR<f1>][,<f2>] -force the formats given (bz2,gz,lz,xz,zst) +force one or both input formats .TP \fB\-p\fR, \fB\-\-show\-c\-function\fR show which C function each change is in @@ -117,6 +117,9 @@ set compressor and options for xz format .TP \fB\-\-zst=\fR<command> set compressor and options for zstd format +.PP +Valid formats for options '\-M' and '\-O' are 'bz2', 'gz', 'lz', 'xz', 'zst', +and 'un' for uncompressed. .SH "REPORTING BUGS" Report bugs to zutils\-bug@nongnu.org .br diff --git a/doc/zgrep.1 b/doc/zgrep.1 index bbe311c..79acdf2 100644 --- a/doc/zgrep.1 +++ b/doc/zgrep.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH ZGREP "1" "April 2022" "zutils 1.12-pre2" "User Commands" +.TH ZGREP "1" "December 2022" "zutils 1.12-rc1" "User Commands" .SH NAME zgrep \- search compressed files for a regular expression .SH SYNOPSIS @@ -112,7 +112,7 @@ don't read runtime configuration file show only the part of a line matching <pattern> .TP \fB\-O\fR, \fB\-\-force\-format=\fR<fmt> -force the format given (bz2, gz, lz, xz, zst) +force the input format .TP \fB\-P\fR, \fB\-\-perl\-regexp\fR <pattern> is a Perl regular expression @@ -165,6 +165,9 @@ set compressor and options for xz format \fB\-\-zst=\fR<command> set compressor and options for zstd format .PP +Valid formats for options '\-M' and '\-O' are 'bz2', 'gz', 'lz', 'xz', 'zst', +and 'un' for uncompressed. +.PP Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... .SH "REPORTING BUGS" diff --git a/doc/ztest.1 b/doc/ztest.1 index 2f51e51..5eb3bb4 100644 --- a/doc/ztest.1 +++ b/doc/ztest.1 @@ -1,19 +1,21 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH ZTEST "1" "April 2022" "zutils 1.12-pre2" "User Commands" +.TH ZTEST "1" "December 2022" "zutils 1.12-rc1" "User Commands" .SH NAME ztest \- verify the integrity of compressed files .SH SYNOPSIS .B ztest [\fI\,options\/\fR] [\fI\,files\/\fR] .SH DESCRIPTION -ztest verifies the integrity of the compressed files specified. -Uncompressed files are ignored. If a file is specified as '\-', the -integrity of compressed data read from standard input is verified. Data -read from standard input must be all in the same compressed format. If -a file fails to decompress, does not exist, can't be opened, or is a -terminal, ztest continues verifying the rest of the files. A final -diagnostic is shown at verbosity level 1 or higher if any file fails the -test when testing multiple files. +ztest verifies the integrity of the compressed files specified. It +also warns if an uncompressed file has a compressed file name extension, or +if a compressed file has a wrong compressed extension. Uncompressed files +are otherwise ignored. If a file is specified as '\-', the integrity of +compressed data read from standard input is verified. Data read from +standard input must be all in the same compressed format. If a file fails to +decompress, does not exist, can't be opened, or is a terminal, ztest +continues verifying the rest of the files. A final diagnostic is shown at +verbosity level 1 or higher if any file fails the test when testing multiple +files. .PP If no files are specified, recursive searches examine the current working directory, and nonrecursive searches read standard input. @@ -28,8 +30,9 @@ garbage output without issuing any warning. Therefore, xz files can't always be verified as reliably as files in the other formats can. .PP Exit status is 0 if all compressed files verify OK, 1 if environmental -problems (file not found, invalid flags, I/O errors, etc), 2 if any -compressed file is corrupt or invalid. +problems (file not found, invalid command line options, I/O errors, etc), +2 if any compressed file is corrupt or invalid, or if any file has an +incorrect file name extension. .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR @@ -45,7 +48,7 @@ process only the formats in <list> don't read runtime configuration file .TP \fB\-O\fR, \fB\-\-force\-format=\fR<fmt> -force the format given (bz2, gz, lz, xz, zst) +force the input format .TP \fB\-q\fR, \fB\-\-quiet\fR suppress all messages @@ -73,6 +76,8 @@ set compressor and options for xz format .TP \fB\-\-zst=\fR<command> set compressor and options for zstd format +.PP +Valid formats for options '\-M' and '\-O' are 'bz2', 'gz', 'lz', 'xz', and 'zst'. .SH "REPORTING BUGS" Report bugs to zutils\-bug@nongnu.org .br diff --git a/doc/zupdate.1 b/doc/zupdate.1 index 8519088..4a8c0b1 100644 --- a/doc/zupdate.1 +++ b/doc/zupdate.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH ZUPDATE "1" "April 2022" "zutils 1.12-pre2" "User Commands" +.TH ZUPDATE "1" "December 2022" "zutils 1.12-rc1" "User Commands" .SH NAME zupdate \- recompress bzip2, gzip, xz, zstd files to lzip format .SH SYNOPSIS @@ -33,8 +33,8 @@ The names of the original files must have one of the following extensions: Exit status is 0 if all the compressed files were successfully recompressed (if needed), compared, and deleted (if requested). 1 if a non\-fatal error occurred (file not found or not regular, or has invalid format, or can't be -deleted). 2 if a fatal error occurred (compressor can't be run, or -comparison fails). +deleted). 2 if a fatal error occurred (invalid command line options, +compressor can't be run, or comparison fails). .SH OPTIONS .TP \fB\-h\fR, \fB\-\-help\fR @@ -43,6 +43,9 @@ display this help and exit \fB\-V\fR, \fB\-\-version\fR output version information and exit .TP +\fB\-d\fR, \fB\-\-destdir=\fR<dir> +write recompressed files into <dir> +.TP \fB\-e\fR, \fB\-\-expand\-extensions\fR expand combined extensions; tgz \-> tar.lz .TP @@ -93,6 +96,8 @@ set compressor and options for xz format .TP \fB\-\-zst=\fR<command> set compressor and options for zstd format +.PP +Valid formats for option '\-M' are 'bz2', 'gz', 'lz', 'xz', and 'zst'. .SH "REPORTING BUGS" Report bugs to zutils\-bug@nongnu.org .br diff --git a/doc/zutils.info b/doc/zutils.info index 9a9ca05..ea12cc3 100644 --- a/doc/zutils.info +++ b/doc/zutils.info @@ -11,13 +11,13 @@ File: zutils.info, Node: Top, Next: Introduction, Up: (dir) Zutils Manual ************* -This manual is for Zutils (version 1.12-pre2, 12 April 2022). +This manual is for Zutils (version 1.12-rc1, 5 December 2022). * Menu: * Introduction:: Purpose and features of zutils * Common options:: Options common to all utilities -* The zutilsrc file:: The zutils configuration file +* Configuration:: The configuration file zutils.conf * Zcat:: Concatenating compressed files * Zcmp:: Comparing compressed files byte by byte * Zdiff:: Comparing compressed files line by line @@ -43,20 +43,22 @@ Zutils is a collection of utilities able to process any combination of compressed and uncompressed files transparently. If any file given, including standard input, is compressed, its decompressed content is used. Compressed files are decompressed on the fly; no temporary files are -created. +created. Data format is detected by its magic bytes, not by the file name +extension. These utilities are not wrapper scripts but safer and more efficient C++ programs. In particular the option '--recursive' is very efficient in those utilities supporting it. -The utilities provided are zcat, zcmp, zdiff, zgrep, ztest, and zupdate. +The utilities provided are 'zcat', 'zcmp', 'zdiff', 'zgrep', 'ztest', and +'zupdate'. The formats supported are bzip2, gzip, lzip, xz, and zstd. Zutils uses external compressors. The compressor to be used for each format is configurable at runtime. - zcat, zcmp, zdiff, and zgrep are improved replacements for the shell -scripts provided by GNU gzip. ztest is unique to zutils. zupdate is similar -to gzip's znew. + 'zcat', 'zcmp', 'zdiff', and 'zgrep' are improved replacements for the +shell scripts provided by GNU gzip. 'ztest' is unique to zutils. 'zupdate' +is similar to gzip's znew. NOTE: Bzip2 and lzip provide well-defined values of exit status, which makes them safe to use with zutils. Gzip and xz may return ambiguous warning @@ -94,7 +96,7 @@ Z zettabyte (10^21) | Zi zebibyte (2^70) Y yottabyte (10^24) | Yi yobibyte (2^80) -File: zutils.info, Node: Common options, Next: The zutilsrc file, Prev: Introduction, Up: Top +File: zutils.info, Node: Common options, Next: Configuration, Prev: Introduction, Up: Top 2 Common options **************** @@ -106,14 +108,17 @@ here. *Note Argument syntax: (arg_parser)Argument syntax. '-h' '--help' Print an informative help message describing the options and exit. - zgrep only supports the '--help' form of this option. + 'zgrep' only supports the '--help' form of this option. '-V' '--version' Print the version number on the standard output and exit. This version - number should be included in all bug reports. In verbose mode, zdiff - and zgrep print also the version of the diff or grep program used - respectively. + number should be included in all bug reports. In verbose mode, 'zdiff' + and 'zgrep' print also the version of the diff or grep program used + respectively. At verbosity level 1 (2 for 'zdiff' and 'zgrep') or + higher, print also the versions of the compressors used (perhaps + limited by option '--format'). (The compressors used must support the + option '-V' for this to work). '-M FORMAT_LIST' '--format=FORMAT_LIST' @@ -138,20 +143,19 @@ here. *Note Argument syntax: (arg_parser)Argument syntax. '-N' '--no-rcfile' - Don't read the runtime configuration file 'zutilsrc'. + Don't read the runtime configuration file 'zutils.conf'. '--bz2=COMMAND' '--gz=COMMAND' '--lz=COMMAND' '--xz=COMMAND' '--zst=COMMAND' - Set program to be used as (de)compressor for the corresponding format. + Set program to be used as decompressor for the corresponding format. COMMAND may include arguments. For example '--lz='plzip --threads=2''. - The program set with '--lz' is used for both compression and - decompression. The others are used only for decompression. The name of - the program can't begin with '-'. These options override the values - set in 'zutilsrc'. The compression program used must meet three - requirements: + 'zupdate' uses '--lz' for compression, not for decompression (*note + lz-compressor::). The name of the program can't begin with '-'. These + options override the values set in 'zutils.conf'. The compression + program used must meet three requirements: 1. When called with the option '-d' and without file names, it must read compressed data from the standard input and produce @@ -165,21 +169,23 @@ here. *Note Argument syntax: (arg_parser)Argument syntax. -File: zutils.info, Node: The zutilsrc file, Next: Zcat, Prev: Common options, Up: Top +File: zutils.info, Node: Configuration, Next: Zcat, Prev: Common options, Up: Top -3 The zutils configuration file 'zutilsrc' -****************************************** +3 The configuration file 'zutils.conf' +************************************** -'zutilsrc' is the runtime configuration file for zutils. In it you may +'zutils.conf' is the runtime configuration file for zutils. In it you may define the compressor name and options to be used for each format. -'zutilsrc' is optional; you don't need to install it in order to run zutils. +'zutils.conf' is optional; you don't need to install it in order to run +zutils. The compressors specified in the command line override those specified -in 'zutilsrc'. +in 'zutils.conf'. - You may copy the system 'zutilsrc' file '${sysconfdir}/zutilsrc' to -'$HOME/.zutilsrc' and customize these options as you like. The file syntax -is fairly obvious (and there are further instructions in it): + You may copy the system 'zutils.conf' file '${sysconfdir}/zutils.conf' +to '$XDG_CONFIG_HOME/zutils.conf' and customize these options as you like. +('XDG_CONFIG_HOME' defaults to '$HOME/.config'). The file syntax is fairly +obvious (and there are further instructions in it): 1. Any line beginning with '#' is a comment line. @@ -189,17 +195,17 @@ is fairly obvious (and there are further instructions in it): where <format> is one of 'bz2', 'gz', 'lz', 'xz', or 'zst'. -File: zutils.info, Node: Zcat, Next: Zcmp, Prev: The zutilsrc file, Up: Top +File: zutils.info, Node: Zcat, Next: Zcmp, Prev: Configuration, Up: Top 4 Zcat ****** -zcat copies each FILE argument to standard output in sequence. If any file -given is compressed, its decompressed content is copied. If a file given -does not exist, and its name does not end with one of the known extensions, -zcat tries the compressed file names corresponding to the formats -supported. If a file fails to decompress, zcat continues copying the rest -of the files. +'zcat' copies each FILE argument to standard output in sequence. If any +file given is compressed, its decompressed content is copied. If a file +given does not exist, and its name does not end with one of the known +extensions, 'zcat' tries the compressed file names corresponding to the +formats supported. If a file fails to decompress, 'zcat' continues copying +the rest of the files. If a file is specified as '-', data are read from standard input, decompressed if needed, and sent to standard output. Data read from @@ -209,13 +215,13 @@ same compressed format. If no files are specified, recursive searches examine the current working directory, and nonrecursive searches read standard input. - The format for running zcat is: + The format for running 'zcat' is: zcat [OPTIONS] [FILES] Exit status is 0 if no errors occurred, 1 otherwise. - zcat supports the following options: + 'zcat' supports the following options: '-A' '--show-all' @@ -240,10 +246,10 @@ Exit status is 0 if no errors occurred, 1 otherwise. '-O FORMAT' '--force-format=FORMAT' Force the compressed format given. Valid values for FORMAT are 'bz2', - 'gz', 'lz', 'xz', and 'zst'. If this option is used, the files are - passed to the corresponding decompressor without verifying their - format, and the exact file name must be given. Other names won't be - tried. + 'gz', 'lz', 'xz', 'zst', and 'un' for 'uncompressed'. If this option + is used, the files are passed to the corresponding decompressor (or + transmitted unmodified) without verifying their format, and the exact + file name must be given. Other names won't be tried. '-q' '--quiet' @@ -278,7 +284,8 @@ Exit status is 0 if no errors occurred, 1 otherwise. stands for "meta"). '--verbose' - Verbose mode. Show error messages. + Verbose mode. Show error messages. Repeating it increases the verbosity + level. *Note version::. @@ -287,19 +294,19 @@ File: zutils.info, Node: Zcmp, Next: Zdiff, Prev: Zcat, Up: Top 5 Zcmp ****** -zcmp compares two files and, if they differ, writes to standard output the -first byte and line number where they differ. Bytes and lines are numbered -starting with 1. A hyphen '-' used as a FILE argument means standard input. -If any file given is compressed, its decompressed content is used. -Compressed files are decompressed on the fly; no temporary files are -created. +'zcmp' compares two files and, if they differ, writes to standard output +the first byte and line number where they differ. Bytes and lines are +numbered starting with 1. A hyphen '-' used as a FILE argument means +standard input. If any file given is compressed, its decompressed content +is used. Compressed files are decompressed on the fly; no temporary files +are created. - The format for running zcmp is: + The format for running 'zcmp' is: zcmp [OPTIONS] FILE1 [FILE2] This compares FILE1 to FILE2. The standard input is used only if FILE1 or -FILE2 refers to standard input. If FILE2 is omitted zcmp tries the +FILE2 refers to standard input. If FILE2 is omitted 'zcmp' tries the following: - If FILE1 is compressed, compares its decompressed contents with the @@ -312,13 +319,19 @@ following: An exit status of 0 means no differences were found, 1 means some differences were found, and 2 means trouble. - zcmp supports the following options: + 'zcmp' supports the following options: '-b' '--print-bytes' - Print the differing bytes. Print control bytes as a '^' followed by a - letter, and precede bytes larger than 127 with 'M-' (which stands for - "meta"). + Print the values of the differing bytes (in octal by default) followed + by the bytes themselves in printable form. Print control bytes as a '^' + followed by a letter, and precede bytes larger than 127 with 'M-' + (which stands for "meta"). + +'-H' +'--hexadecimal' + Print the values of the differing bytes in hexadecimal instead of + octal. '-i SIZE' '--ignore-initial=SIZE' @@ -328,11 +341,9 @@ differences were found, and 2 means trouble. first input file and the first SIZE2 bytes of the second input file. '-l' -'-v' '--list' -'--verbose' - Print the byte numbers (in decimal) and values (in octal) of all - differing bytes. + Print the byte numbers (in decimal) and values (in octal by default) + of all differing bytes. Bytes are numbered starting with 1. '-n COUNT' '--bytes=COUNT' @@ -342,19 +353,50 @@ differences were found, and 2 means trouble. '--force-format=[FORMAT1][,FORMAT2]' Force the compressed formats given. Any of FORMAT1 or FORMAT2 may be omitted and the corresponding format will be automatically detected. - Valid values for FORMAT are 'bz2', 'gz', 'lz', 'xz', and 'zst'. If at - least one format is specified with this option, the file is passed to - the corresponding decompressor without verifying its format, and the - exact file names of both FILE1 and FILE2 must be given. Other names - won't be tried. + Valid values for FORMAT are 'bz2', 'gz', 'lz', 'xz', 'zst', and 'un' + for 'uncompressed'. If at least one format is specified with this + option, the file is passed to the corresponding decompressor (or + transmitted unmodified) without verifying its format, and the exact + file names of both FILE1 and FILE2 must be given. Other names won't be + tried. '-q' -'-s' '--quiet' '--silent' - Don't print anything; only return an exit status indicating whether the - files differ. + Suppress diagnostics written to standard error, even the + 'EOF on <name_of_shorter_file>' diagnostic. Byte differences are still + written to standard output. ('-q' produces no output except byte + differences). + +'-s' +'--script' + Write nothing to standard output or standard error when files differ, + not even the 'EOF on <name_of_shorter_file>' diagnostic; indicate + differing files through exit status only. Diagnostic messages are still + written to standard error when an error is encountered. ('-s' produces + no output except error messages). +'-v' +'--verbose' + Verbose mode. Undoes the effect of '--quiet'. Further -v's increase + the verbosity level. *Note version::. + + + Byte counts given as arguments to options may be expressed in decimal, +hexadecimal, or octal (using the same syntax as integer constants in C++), +and may be followed by a multiplier and an optional 'B' for "byte". + + Table of SI and binary prefixes (unit multipliers): + +Prefix Value | Prefix Value +k kilobyte (10^3 = 1000) | Ki kibibyte (2^10 = 1024) +M megabyte (10^6) | Mi mebibyte (2^20) +G gigabyte (10^9) | Gi gibibyte (2^30) +T terabyte (10^12) | Ti tebibyte (2^40) +P petabyte (10^15) | Pi pebibyte (2^50) +E exabyte (10^18) | Ei exbibyte (2^60) +Z zettabyte (10^21) | Zi zebibyte (2^70) +Y yottabyte (10^24) | Yi yobibyte (2^80) File: zutils.info, Node: Zdiff, Next: Zgrep, Prev: Zcmp, Up: Top @@ -362,19 +404,19 @@ File: zutils.info, Node: Zdiff, Next: Zgrep, Prev: Zcmp, Up: Top 6 Zdiff ******* -zdiff compares two files and, if they differ, writes to standard output the -differences line by line. A hyphen '-' used as a FILE argument means +'zdiff' compares two files and, if they differ, writes to standard output +the differences line by line. A hyphen '-' used as a FILE argument means standard input. If any file given is compressed, its decompressed content -is used. zdiff is a front end to the program diff and has the limitation +is used. 'zdiff' is a front end to the program diff and has the limitation that messages from diff refer to temporary file names instead of those specified. - The format for running zdiff is: + The format for running 'zdiff' is: zdiff [OPTIONS] FILE1 [FILE2] This compares FILE1 to FILE2. The standard input is used only if FILE1 or -FILE2 refers to standard input. If FILE2 is omitted zdiff tries the +FILE2 refers to standard input. If FILE2 is omitted 'zdiff' tries the following: - If FILE1 is compressed, compares its decompressed contents with the @@ -387,8 +429,8 @@ following: An exit status of 0 means no differences were found, 1 means some differences were found, and 2 means trouble. - zdiff supports the following options (some options only work if the diff -program used supports them): + 'zdiff' supports the following options (some options only work if the +diff program used supports them): '-a' '--text' @@ -425,11 +467,12 @@ program used supports them): '--force-format=[FORMAT1][,FORMAT2]' Force the compressed formats given. Any of FORMAT1 or FORMAT2 may be omitted and the corresponding format will be automatically detected. - Valid values for FORMAT are 'bz2', 'gz', 'lz', 'xz', and 'zst'. If at - least one format is specified with this option, the file is passed to - the corresponding decompressor without verifying its format, and the - exact file names of both FILE1 and FILE2 must be given. Other names - won't be tried. + Valid values for FORMAT are 'bz2', 'gz', 'lz', 'xz', 'zst', and 'un' + for 'uncompressed'. If at least one format is specified with this + option, the file is passed to the corresponding decompressor (or + transmitted unmodified) without verifying its format, and the exact + file names of both FILE1 and FILE2 must be given. Other names won't be + tried. '-p' '--show-c-function' @@ -461,7 +504,8 @@ program used supports them): '-v' '--verbose' When specified before '--version', print the version of the diff - program used. + program used. Further -v's increase the verbosity level. *Note + version::. '-w' '--ignore-all-space' @@ -483,12 +527,13 @@ File: zutils.info, Node: Zgrep, Next: Ztest, Prev: Zdiff, Up: Top 7 Zgrep ******* -zgrep is a front end to the program grep that allows transparent search on -any combination of compressed and uncompressed files. If any file given is -compressed, its decompressed content is used. If a file given does not -exist, and its name does not end with one of the known extensions, zgrep +'zgrep' is a front end to the program grep that allows transparent search +on any combination of compressed and uncompressed files. If any file given +is compressed, its decompressed content is used. If a file given does not +exist, and its name does not end with one of the known extensions, 'zgrep' tries the compressed file names corresponding to the formats supported. If -a file fails to decompress, zgrep continues searching the rest of the files. +a file fails to decompress, 'zgrep' continues searching the rest of the +files. If a file is specified as '-', data are read from standard input, decompressed if needed, and fed to grep. Data read from standard input must @@ -497,16 +542,23 @@ be of the same type; all uncompressed or all in the same compressed format. If no files are specified, recursive searches examine the current working directory, and nonrecursive searches read standard input. - The format for running zgrep is: + For efficiency reasons, 'zgrep' does not always read all its input. For +example, the shell command 'base64 -d foo | zgrep -q X' can cause 'zgrep' +to exit immediately after reading a line containing 'X', without bothering +to read the rest of its input data. This in turn can cause base64 to exit +with a nonzero status because base64 cannot write to its output pipe after +'zgrep' exits. + + The format for running 'zgrep' is: zgrep [OPTIONS] PATTERN [FILES] An exit status of 0 means at least one match was found, 1 means no matches were found, and 2 means trouble. - zgrep supports the following options (Some options only work if the grep -program used supports them. Options -h, -H, -r, -R, and -Z are managed by -zgrep and not passed to grep): + 'zgrep' supports the following options (Some options only work if the +grep program used supports them. Options -h, -H, -r, -R, and -Z are managed +by 'zgrep' and not passed to grep): '-a' '--text' @@ -577,11 +629,13 @@ zgrep and not passed to grep): '-l' '--files-with-matches' - Only print names of files containing at least one match. + Only print names of files containing at least one match. Stop reading + each file on the first match. '-L' '--files-without-match' - Only print names of files not containing any matches. + Only print names of files not containing any matches. Stop reading + each file on the first match. Note: option -L fails (prints wrong results, returns wrong status, and even hangs) when using GNU grep versions 3.2 to 3.4 inclusive because of a wrong change in the exit status of grep, which was reverted in @@ -609,10 +663,10 @@ zgrep and not passed to grep): '-O FORMAT' '--force-format=FORMAT' Force the compressed format given. Valid values for FORMAT are 'bz2', - 'gz', 'lz', 'xz', and 'zst'. If this option is used, the files are - passed to the corresponding decompressor without verifying their - format, and the exact file name must be given. Other names won't be - tried. + 'gz', 'lz', 'xz', 'zst', and 'un' for 'uncompressed'. If this option + is used, the files are passed to the corresponding decompressor (or + transmitted unmodified) without verifying their format, and the exact + file name must be given. Other names won't be tried. '-P' '--perl-regexp' @@ -655,7 +709,8 @@ zgrep and not passed to grep): '--verbose' Verbose mode. Show error messages. When specified before '--version', - print the version of the grep program used. + print the version of the grep program used. Repeating it increases the + verbosity level. *Note version::. '-w' '--word-regexp' @@ -680,14 +735,16 @@ File: zutils.info, Node: Ztest, Next: Zupdate, Prev: Zgrep, Up: Top 8 Ztest ******* -ztest verifies the integrity of the compressed files specified. -Uncompressed files are ignored. If a file is specified as '-', the -integrity of compressed data read from standard input is verified. Data -read from standard input must be all in the same compressed format. If a -file fails to decompress, does not exist, can't be opened, or is a -terminal, ztest continues verifying the rest of the files. A final -diagnostic is shown at verbosity level 1 or higher if any file fails the -test when testing multiple files. +'ztest' verifies the integrity of the compressed files specified. It also +warns if an uncompressed file has a compressed file name extension, or if a +compressed file has a wrong compressed extension. Uncompressed files are +otherwise ignored. If a file is specified as '-', the integrity of +compressed data read from standard input is verified. Data read from +standard input must be all in the same compressed format. If a file fails to +decompress, does not exist, can't be opened, or is a terminal, 'ztest' +continues verifying the rest of the files. A final diagnostic is shown at +verbosity level 1 or higher if any file fails the test when testing multiple +files. If no files are specified, recursive searches examine the current working directory, and nonrecursive searches read standard input. @@ -703,15 +760,16 @@ of the xz format specification allows xz decompressors to produce garbage output without issuing any warning. Therefore, xz files can't always be verified as reliably as files in the other formats can. - The format for running ztest is: + The format for running 'ztest' is: ztest [OPTIONS] [FILES] -The exit status is 0 if all compressed files verify OK, 1 if environmental -problems (file not found, invalid flags, I/O errors, etc), 2 if any -compressed file is corrupt or invalid. +Exit status is 0 if all compressed files verify OK, 1 if environmental +problems (file not found, invalid command line options, I/O errors, etc), 2 +if any compressed file is corrupt or invalid, or if any file has an +incorrect file name extension. - ztest supports the following options: + 'ztest' supports the following options: '-O FORMAT' '--force-format=FORMAT' @@ -738,8 +796,8 @@ compressed file is corrupt or invalid. '-v' '--verbose' - Verbose mode. Show the verify status for each file processed. - Further -v's increase the verbosity level. + Verbose mode. Show the verify status for each file processed. Further + -v's increase the verbosity level. *Note version::. @@ -748,12 +806,12 @@ File: zutils.info, Node: Zupdate, Next: Problems, Prev: Ztest, Up: Top 9 Zupdate ********* -zupdate recompresses files from bzip2, gzip, xz, and zstd formats to lzip +'zupdate' recompresses files from bzip2, gzip, xz, and zstd formats to lzip format. Each original is compared with the new file and then deleted. Only regular files with standard file name extensions are recompressed, other files are ignored. Compressed files are decompressed and then recompressed on the fly; no temporary files are created. If an error happens while -recompressing a file, zupdate exits immediately without recompressing the +recompressing a file, 'zupdate' exits immediately without recompressing the rest of the files. The lzip format is chosen as destination because it is the most appropriate for long-term data archiving. @@ -763,7 +821,7 @@ directory, and nonrecursive searches do nothing. If the lzip compressed version of a file already exists, the file is skipped unless the option '--force' is given. In this case, if the comparison with the existing lzip version fails, an error is returned and -the original file is not deleted. The operation of zupdate is meant to be +the original file is not deleted. The operation of 'zupdate' is meant to be safe and not cause any data loss. Therefore, existing lzip compressed files are never overwritten nor deleted. @@ -787,23 +845,40 @@ recompressing Slackware packages, for example. If the decompressor for the xz or zstd formats is not found, the corresponding files are ignored. - Recompressing a file is much like copying or moving it. Therefore zupdate -preserves the access and modification dates, permissions, and, if you have -appropriate privileges, ownership of the file just as 'cp -p' does. (If the -user ID or the group ID can't be duplicated, the file permission bits -S_ISUID and S_ISGID are cleared). + Recompressing a file is much like copying or moving it. Therefore +'zupdate' preserves the access and modification dates, permissions, and, if +you have appropriate privileges, ownership of the file just as 'cp -p' +does. (If the user ID or the group ID can't be duplicated, the file +permission bits S_ISUID and S_ISGID are cleared). - The format for running zupdate is: + The format for running 'zupdate' is: zupdate [OPTIONS] [FILES] Exit status is 0 if all the compressed files were successfully recompressed (if needed), compared, and deleted (if requested). 1 if a non-fatal error occurred (file not found or not regular, or has invalid format, or can't be -deleted). 2 if a fatal error occurred (compressor can't be run, or -comparison fails). - - zupdate supports the following options: +deleted). 2 if a fatal error occurred (invalid command line options, +compressor can't be run, or comparison fails). + + 'zupdate' supports the following options: + +'-d DIR' +'--destdir=DIR' + Write recompressed files to another directory, using DIR as base + directory, instead of writing them in the same directory as the + original files. In recursive mode, this is done by replacing each + directory specified in the command line with DIR to produce the + recompressed file names. For example, 'zupdate -r -d DIR ../a' + recompresses a file named '../a/b/c.gz' to 'DIR/b/c.lz'. Regular files + specified in the command line are recompressed directly into DIR. For + example, 'zupdate -d DIR ../a/b/c.gz' writes the recompressed file to + 'DIR/c.lz'. + + This option allows recompressing files from a read-only file system to + another place without the need to copy or link them to the destination + directory first. (Remember to use option '--keep' when recompressing + read-only files to avoid warnings about files that can't be deleted). '-e' '--expand-extensions' @@ -851,13 +926,26 @@ comparison fails). '-v' '--verbose' Verbose mode. Show the files being processed. A second '-v' also shows - the files being ignored. + the files being ignored and increases the verbosity level. *Note + version::. '-0 .. -9' - Set the compression level of lzip. By default zupdate passes '-9' to + Set the compression level of lzip. By default 'zupdate' passes '-9' to lzip. Custom compression options can be passed to lzip with the option '--lz'. For example '--lz='lzip -9 -s64MiB''. +'--lz=COMMAND' + Set compression command. COMMAND may include arguments. For example + '--lz='plzip --threads=2''. The name of the program can't begin with + '-'. This option overrides the value set in 'zutils.conf'. The + compression program used does not need to implement decompression + (*note compressor-requirements::), but it must implement at least the + compression level option '-9' and the option '-o FILE' to write the + compressed output to FILE. tarlz meets these requirements, and + therefore can be used to recompress POSIX tar archives by using a + command like 'zupdate --lz='tarlz -9 -z --no-solid' archive.tar.gz'. + *Note tarlz manual: (tarlz)Top. + File: zutils.info, Node: Problems, Next: Concept index, Prev: Zupdate, Up: Top @@ -893,24 +981,26 @@ Concept index * zgrep: Zgrep. (line 6) * ztest: Ztest. (line 6) * zupdate: Zupdate. (line 6) -* zutilsrc: The zutilsrc file. (line 6) +* zutils.conf: Configuration. (line 6) Tag Table: Node: Top217 -Node: Introduction1150 -Node: Common options3897 -Ref: compressor-requirements6134 -Node: The zutilsrc file6529 -Node: Zcat7497 -Node: Zcmp10072 -Node: Zdiff12573 -Node: Zgrep15623 -Node: Ztest21115 -Node: Zupdate23681 -Node: Problems28191 -Node: Concept index28725 +Node: Introduction1156 +Node: Common options4003 +Ref: version4489 +Ref: compressor-requirements6440 +Node: Configuration6835 +Node: Zcat7868 +Node: Zcmp10568 +Node: Zdiff14825 +Node: Zgrep18008 +Node: Ztest24116 +Node: Zupdate26915 +Ref: lz-compressor32442 +Node: Problems33143 +Node: Concept index33677 End Tag Table diff --git a/doc/zutils.texi b/doc/zutils.texi index 6324814..459d38d 100644 --- a/doc/zutils.texi +++ b/doc/zutils.texi @@ -6,8 +6,8 @@ @finalout @c %**end of header -@set UPDATED 12 April 2022 -@set VERSION 1.12-pre2 +@set UPDATED 5 December 2022 +@set VERSION 1.12-rc1 @dircategory Compression @direntry @@ -38,7 +38,7 @@ This manual is for Zutils (version @value{VERSION}, @value{UPDATED}). @menu * Introduction:: Purpose and features of zutils * Common options:: Options common to all utilities -* The zutilsrc file:: The zutils configuration file +* Configuration:: The configuration file zutils.conf * Zcat:: Concatenating compressed files * Zcmp:: Comparing compressed files byte by byte * Zdiff:: Comparing compressed files line by line @@ -66,21 +66,25 @@ is a collection of utilities able to process any combination of compressed and uncompressed files transparently. If any file given, including standard input, is compressed, its decompressed content is used. Compressed files are decompressed on the fly; no temporary files are -created. +created. Data format is detected by its magic bytes, not by the file name +extension. These utilities are not wrapper scripts but safer and more efficient C++ -programs. In particular the option @samp{--recursive} is very efficient in +programs. In particular the option @option{--recursive} is very efficient in those utilities supporting it. @noindent -The utilities provided are zcat, zcmp, zdiff, zgrep, ztest, and zupdate.@* -The formats supported are bzip2, gzip, lzip, xz, and zstd.@* +The utilities provided are @command{zcat}, @command{zcmp}, @command{zdiff}, +@command{zgrep}, @command{ztest}, and @command{zupdate}.@* +The formats supported are bzip2, gzip, +@uref{http://www.nongnu.org/lzip/lzip.html,,lzip}, xz, and zstd.@* Zutils uses external compressors. The compressor to be used for each format is configurable at runtime. -zcat, zcmp, zdiff, and zgrep are improved replacements for the shell scripts -provided by GNU gzip. ztest is unique to zutils. zupdate is similar to -gzip's znew. +@command{zcat}, @command{zcmp}, @command{zdiff}, and @command{zgrep} are +improved replacements for the shell scripts provided by GNU gzip. +@command{ztest} is unique to zutils. @command{zupdate} is similar to gzip's +znew. NOTE: Bzip2 and lzip provide well-defined values of exit status, which makes them safe to use with zutils. Gzip and xz may return ambiguous warning @@ -88,7 +92,7 @@ values, making them less reliable back ends for zutils. Zstd currently does not even document its exit status in its man page. @xref{compressor-requirements}. -FORMAT NOTE 1: The option @samp{--format} allows the processing of a subset +FORMAT NOTE 1: The option @option{--format} allows the processing of a subset of formats in recursive mode and when trying compressed file names. For example, use the following command to search for the string @samp{foo} in gzip and lzip files only: @@ -136,15 +140,19 @@ descriptions for each of the programs, they are described here. @table @code @item -h @itemx --help -Print an informative help message describing the options and exit. zgrep -only supports the @samp{--help} form of this option. +Print an informative help message describing the options and exit. +@command{zgrep} only supports the @option{--help} form of this option. +@anchor{version} @item -V @itemx --version Print the version number on the standard output and exit. This version number should be included in all bug reports. -In verbose mode, zdiff and zgrep print also the version of the diff or grep -program used respectively. +In verbose mode, @command{zdiff} and @command{zgrep} print also the version +of the diff or grep program used respectively. At verbosity level 1 (2 for +@command{zdiff} and @command{zgrep}) or higher, print also the versions of +the compressors used (perhaps limited by option @option{--format}). (The +compressors used must support the option @option{-V} for this to work). @item -M @var{format_list} @itemx --format=@var{format_list} @@ -171,29 +179,29 @@ extensions: @item -N @itemx --no-rcfile -Don't read the runtime configuration file @samp{zutilsrc}. +Don't read the runtime configuration file @file{zutils.conf}. @item --bz2=@var{command} @itemx --gz=@var{command} @itemx --lz=@var{command} @itemx --xz=@var{command} @itemx --zst=@var{command} -Set program to be used as (de)compressor for the corresponding format. +Set program to be used as decompressor for the corresponding format. @var{command} may include arguments. For example -@w{@samp{--lz='plzip --threads=2'}}. The program set with @samp{--lz} is -used for both compression and decompression. The others are used only for -decompression. The name of the program can't begin with @samp{-}. These -options override the values set in @file{zutilsrc}. The compression program -used must meet three requirements: +@w{@option{--lz='plzip --threads=2'}}. @command{zupdate} uses @option{--lz} +for compression, not for decompression (@pxref{lz-compressor}). The name of +the program can't begin with @samp{-}. These options override the values set +in @file{zutils.conf}. The compression program used must meet three +requirements: @anchor{compressor-requirements} @enumerate @item -When called with the option @samp{-d} and without file names, it must read +When called with the option @option{-d} and without file names, it must read compressed data from the standard input and produce decompressed data on the standard output. @item -If the option @samp{-q} is passed to zutils, the compression program must +If the option @option{-q} is passed to zutils, the compression program must also accept it. @item It must return 0 if no errors occurred, and a non-zero value otherwise. @@ -202,21 +210,22 @@ It must return 0 if no errors occurred, and a non-zero value otherwise. @end table -@node The zutilsrc file -@chapter The zutils configuration file 'zutilsrc' -@cindex zutilsrc +@node Configuration +@chapter The configuration file 'zutils.conf' +@cindex zutils.conf -@file{zutilsrc} is the runtime configuration file for zutils. In it you +@file{zutils.conf} is the runtime configuration file for zutils. In it you may define the compressor name and options to be used for each format. -@file{zutilsrc} is optional; you don't need to install it in order to run +@file{zutils.conf} is optional; you don't need to install it in order to run zutils. The compressors specified in the command line override those specified -in @file{zutilsrc}. +in @file{zutils.conf}. -You may copy the system @file{zutilsrc} file @file{$@{sysconfdir@}/zutilsrc} -to @file{$HOME/.zutilsrc} and customize these options as you like. The file -syntax is fairly obvious (and there are further instructions in it): +You may copy the system @file{zutils.conf} file @file{$@{sysconfdir@}/zutils.conf} +to @file{$XDG_CONFIG_HOME/zutils.conf} and customize these options as you like. +(@env{XDG_CONFIG_HOME} defaults to @file{$HOME/.config}). The file syntax is +fairly obvious (and there are further instructions in it): @enumerate @item @@ -236,12 +245,12 @@ where <format> is one of @samp{bz2}, @samp{gz}, @samp{lz}, @samp{xz}, or @chapter Zcat @cindex zcat -zcat copies each @var{file} argument to standard output in sequence. If any -file given is compressed, its decompressed content is copied. If a file -given does not exist, and its name does not end with one of the known -extensions, zcat tries the compressed file names corresponding to the -formats supported. If a file fails to decompress, zcat continues copying the -rest of the files. +@command{zcat} copies each @var{file} argument to standard output in +sequence. If any file given is compressed, its decompressed content is +copied. If a file given does not exist, and its name does not end with one +of the known extensions, @command{zcat} tries the compressed file names +corresponding to the formats supported. If a file fails to decompress, +@command{zcat} continues copying the rest of the files. If a file is specified as @samp{-}, data are read from standard input, decompressed if needed, and sent to standard output. Data read from @@ -251,7 +260,7 @@ same compressed format. If no files are specified, recursive searches examine the current working directory, and nonrecursive searches read standard input. -The format for running zcat is: +The format for running @command{zcat} is: @example zcat [@var{options}] [@var{files}] @@ -260,12 +269,12 @@ zcat [@var{options}] [@var{files}] @noindent Exit status is 0 if no errors occurred, 1 otherwise. -zcat supports the following options: +@command{zcat} supports the following options: @table @code @item -A @itemx --show-all -Equivalent to @samp{-vET}. +Equivalent to @option{-vET}. @item -b @itemx --number-nonblank @@ -273,7 +282,7 @@ Number all nonblank output lines, starting with 1. The line count is unlimited. @item -e -Equivalent to @samp{-vE}. +Equivalent to @option{-vE}. @item -E @itemx --show-ends @@ -286,10 +295,11 @@ Number all output lines, starting with 1. The line count is unlimited. @item -O @var{format} @itemx --force-format=@var{format} Force the compressed format given. Valid values for @var{format} are -@samp{bz2}, @samp{gz}, @samp{lz}, @samp{xz}, and @samp{zst}. If this option -is used, the files are passed to the corresponding decompressor without -verifying their format, and the exact file name must be given. Other names -won't be tried. +@samp{bz2}, @samp{gz}, @samp{lz}, @samp{xz}, @samp{zst}, and @samp{un} for +@samp{uncompressed}. If this option is used, the files are passed to the +corresponding decompressor (or transmitted unmodified) without verifying +their format, and the exact file name must be given. Other names won't be +tried. @item -q @itemx --quiet @@ -311,7 +321,7 @@ recursively, following all symbolic links. Replace multiple adjacent blank lines with a single blank line. @item -t -Equivalent to @samp{-vT}. +Equivalent to @option{-vT}. @item -T @itemx --show-tabs @@ -324,7 +334,8 @@ notation and precede characters larger than 127 with @samp{M-} (which stands for "meta"). @item --verbose -Verbose mode. Show error messages. +Verbose mode. Show error messages. Repeating it increases the verbosity +level. @xref{version}. @end table @@ -333,14 +344,14 @@ Verbose mode. Show error messages. @chapter Zcmp @cindex zcmp -zcmp compares two files and, if they differ, writes to standard output the -first byte and line number where they differ. Bytes and lines are numbered -starting with 1. A hyphen @samp{-} used as a @var{file} argument means -standard input. If any file given is compressed, its decompressed content is -used. Compressed files are decompressed on the fly; no temporary files are -created. +@command{zcmp} compares two files and, if they differ, writes to standard +output the first byte and line number where they differ. Bytes and lines are +numbered starting with 1. A hyphen @samp{-} used as a @var{file} argument +means standard input. If any file given is compressed, its decompressed +content is used. Compressed files are decompressed on the fly; no temporary +files are created. -The format for running zcmp is: +The format for running @command{zcmp} is: @example zcmp [@var{options}] @var{file1} [@var{file2}] @@ -349,7 +360,7 @@ zcmp [@var{options}] @var{file1} [@var{file2}] @noindent This compares @var{file1} to @var{file2}. The standard input is used only if @var{file1} or @var{file2} refers to standard input. If @var{file2} is -omitted zcmp tries the following: +omitted @command{zcmp} tries the following: @itemize - @item @@ -365,14 +376,19 @@ contents of @var{file1}.[lz|bz2|gz|zst|xz] (the first one that is found). An exit status of 0 means no differences were found, 1 means some differences were found, and 2 means trouble. -zcmp supports the following options: +@command{zcmp} supports the following options: @table @code @item -b @itemx --print-bytes -Print the differing bytes. Print control bytes as a @samp{^} followed by -a letter, and precede bytes larger than 127 with @samp{M-} (which stands -for "meta"). +Print the values of the differing bytes (in octal by default) followed by +the bytes themselves in printable form. Print control bytes as a @samp{^} +followed by a letter, and precede bytes larger than 127 with @samp{M-} +(which stands for "meta"). + +@item -H +@itemx --hexadecimal +Print the values of the differing bytes in hexadecimal instead of octal. @item -i @var{size} @itemx --ignore-initial=@var{size} @@ -383,11 +399,9 @@ first @var{size1} bytes of the first input file and the first @var{size2} bytes of the second input file. @item -l -@itemx -v @itemx --list -@itemx --verbose -Print the byte numbers (in decimal) and values (in octal) of all -differing bytes. +Print the byte numbers (in decimal) and values (in octal by default) of all +differing bytes. Bytes are numbered starting with 1. @item -n @var{count} @itemx --bytes=@var{count} @@ -398,33 +412,66 @@ Compare at most @var{count} input bytes. Force the compressed formats given. Any of @var{format1} or @var{format2} may be omitted and the corresponding format will be automatically detected. Valid values for @var{format} are @samp{bz2}, @samp{gz}, @samp{lz}, -@samp{xz}, and @samp{zst}. If at least one format is specified with this -option, the file is passed to the corresponding decompressor without -verifying its format, and the exact file names of both @var{file1} and -@var{file2} must be given. Other names won't be tried. +@samp{xz}, @samp{zst}, and @samp{un} for @samp{uncompressed}. If at least +one format is specified with this option, the file is passed to the +corresponding decompressor (or transmitted unmodified) without verifying its +format, and the exact file names of both @var{file1} and @var{file2} must be +given. Other names won't be tried. @item -q -@itemx -s @itemx --quiet @itemx --silent -Don't print anything; only return an exit status indicating whether the -files differ. +Suppress diagnostics written to standard error, even the +@w{@samp{EOF on <name_of_shorter_file>}} diagnostic. Byte differences are +still written to standard output. (@option{-q} produces no output except +byte differences). + +@item -s +@itemx --script +Write nothing to standard output or standard error when files differ, not +even the @w{@samp{EOF on <name_of_shorter_file>}} diagnostic; indicate +differing files through exit status only. Diagnostic messages are still +written to standard error when an error is encountered. (@option{-s} +produces no output except error messages). + +@item -v +@itemx --verbose +Verbose mode. Undoes the effect of @option{--quiet}. Further -v's increase +the verbosity level. @xref{version}. @end table +Byte counts given as arguments to options may be expressed in decimal, +hexadecimal, or octal (using the same syntax as integer constants in C++), +and may be followed by a multiplier and an optional @samp{B} for "byte". + +Table of SI and binary prefixes (unit multipliers): + +@multitable {Prefix} {kilobyte (10^3 = 1000)} {|} {Prefix} {kibibyte (2^10 = 1024)} +@item Prefix @tab Value @tab | @tab Prefix @tab Value +@item k @tab kilobyte (10^3 = 1000) @tab | @tab Ki @tab kibibyte (2^10 = 1024) +@item M @tab megabyte (10^6) @tab | @tab Mi @tab mebibyte (2^20) +@item G @tab gigabyte (10^9) @tab | @tab Gi @tab gibibyte (2^30) +@item T @tab terabyte (10^12) @tab | @tab Ti @tab tebibyte (2^40) +@item P @tab petabyte (10^15) @tab | @tab Pi @tab pebibyte (2^50) +@item E @tab exabyte (10^18) @tab | @tab Ei @tab exbibyte (2^60) +@item Z @tab zettabyte (10^21) @tab | @tab Zi @tab zebibyte (2^70) +@item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80) +@end multitable + @node Zdiff @chapter Zdiff @cindex zdiff -zdiff compares two files and, if they differ, writes to standard output the -differences line by line. A hyphen @samp{-} used as a @var{file} argument -means standard input. If any file given is compressed, its decompressed -content is used. zdiff is a front end to the program diff and has the -limitation that messages from diff refer to temporary file names instead of -those specified. +@command{zdiff} compares two files and, if they differ, writes to standard +output the differences line by line. A hyphen @samp{-} used as a @var{file} +argument means standard input. If any file given is compressed, its +decompressed content is used. @command{zdiff} is a front end to the program +diff and has the limitation that messages from diff refer to temporary file +names instead of those specified. -The format for running zdiff is: +The format for running @command{zdiff} is: @example zdiff [@var{options}] @var{file1} [@var{file2}] @@ -433,7 +480,7 @@ zdiff [@var{options}] @var{file1} [@var{file2}] @noindent This compares @var{file1} to @var{file2}. The standard input is used only if @var{file1} or @var{file2} refers to standard input. If @var{file2} is -omitted zdiff tries the following: +omitted @command{zdiff} tries the following: @itemize - @item @@ -449,8 +496,8 @@ contents of @var{file1}.[lz|bz2|gz|zst|xz] (the first one that is found). An exit status of 0 means no differences were found, 1 means some differences were found, and 2 means trouble. -zdiff supports the following options (some options only work if the diff -program used supports them): +@command{zdiff} supports the following options (some options only work if +the diff program used supports them): @table @code @item -a @@ -465,7 +512,7 @@ Ignore changes in the amount of white space. @itemx --ignore-blank-lines Ignore changes whose lines are all blank. -@itemx -c +@item -c Use the context output format. @item -C @var{n} @@ -489,10 +536,11 @@ Ignore case differences in file contents. Force the compressed formats given. Any of @var{format1} or @var{format2} may be omitted and the corresponding format will be automatically detected. Valid values for @var{format} are @samp{bz2}, @samp{gz}, @samp{lz}, -@samp{xz}, and @samp{zst}. If at least one format is specified with this -option, the file is passed to the corresponding decompressor without -verifying its format, and the exact file names of both @var{file1} and -@var{file2} must be given. Other names won't be tried. +@samp{xz}, @samp{zst}, and @samp{un} for @samp{uncompressed}. If at least +one format is specified with this option, the file is passed to the +corresponding decompressor (or transmitted unmodified) without verifying its +format, and the exact file names of both @var{file1} and @var{file2} must be +given. Other names won't be tried. @item -p @itemx --show-c-function @@ -523,8 +571,8 @@ Same as -u but use @var{n} lines of context. @item -v @itemx --verbose -When specified before @samp{--version}, print the version of the diff -program used. +When specified before @option{--version}, print the version of the diff +program used. Further -v's increase the verbosity level. @xref{version}. @item -w @itemx --ignore-all-space @@ -546,12 +594,12 @@ Use the side by side output format. @chapter Zgrep @cindex zgrep -zgrep is a front end to the program grep that allows transparent search -on any combination of compressed and uncompressed files. If any file -given is compressed, its decompressed content is used. If a file given -does not exist, and its name does not end with one of the known -extensions, zgrep tries the compressed file names corresponding to the -formats supported. If a file fails to decompress, zgrep continues +@command{zgrep} is a front end to the program grep that allows transparent +search on any combination of compressed and uncompressed files. If any file +given is compressed, its decompressed content is used. If a file given does +not exist, and its name does not end with one of the known extensions, +@command{zgrep} tries the compressed file names corresponding to the formats +supported. If a file fails to decompress, @command{zgrep} continues searching the rest of the files. If a file is specified as @samp{-}, data are read from standard input, @@ -562,7 +610,14 @@ compressed format. If no files are specified, recursive searches examine the current working directory, and nonrecursive searches read standard input. -The format for running zgrep is: +For efficiency reasons, @command{zgrep} does not always read all its input. +For example, the shell command @w{@samp{base64 -d foo | zgrep -q X}} can +cause @command{zgrep} to exit immediately after reading a line containing +@samp{X}, without bothering to read the rest of its input data. This in turn +can cause base64 to exit with a nonzero status because base64 cannot write +to its output pipe after @command{zgrep} exits. + +The format for running @command{zgrep} is: @example zgrep [@var{options}] @var{pattern} [@var{files}] @@ -572,9 +627,9 @@ zgrep [@var{options}] @var{pattern} [@var{files}] An exit status of 0 means at least one match was found, 1 means no matches were found, and 2 means trouble. -zgrep supports the following options (Some options only work if the grep -program used supports them. Options -h, -H, -r, -R, and -Z are managed by -zgrep and not passed to grep): +@command{zgrep} supports the following options (Some options only work if +the grep program used supports them. Options -h, -H, -r, -R, and -Z are +managed by @command{zgrep} and not passed to grep): @table @code @item -a @@ -616,9 +671,9 @@ Interpret @var{pattern} as an extended regular expression (ERE). @item -f @var{file} @itemx --file=@var{file} Obtain patterns from @var{file}, one per line.@* -When searching in several files at once, command substitution can be -used with @samp{-e} to read @var{file} only once, for example if -@var{file} is not a regular file: +When searching in several files at once, command substitution can be used +with @option{-e} to read @var{file} only once, for example if @var{file} is +not a regular file: @w{@samp{zgrep -e "$(cat @var{file})" file1.lz file2.gz}} @item -F @@ -648,11 +703,13 @@ Ignore binary files. @item -l @itemx --files-with-matches -Only print names of files containing at least one match. +Only print names of files containing at least one match. Stop reading each +file on the first match. @item -L @itemx --files-without-match -Only print names of files not containing any matches.@* +Only print names of files not containing any matches. Stop reading each file +on the first match.@* Note: option -L fails (prints wrong results, returns wrong status, and even hangs) when using GNU grep versions 3.2 to 3.4 inclusive because of a wrong change in the exit status of grep, which was reverted in GNU grep 3.5. @@ -679,10 +736,11 @@ Show only the part of matching lines that actually matches @var{pattern}. @item -O @var{format} @itemx --force-format=@var{format} Force the compressed format given. Valid values for @var{format} are -@samp{bz2}, @samp{gz}, @samp{lz}, @samp{xz}, and @samp{zst}. If this option -is used, the files are passed to the corresponding decompressor without -verifying their format, and the exact file name must be given. Other names -won't be tried. +@samp{bz2}, @samp{gz}, @samp{lz}, @samp{xz}, @samp{zst}, and @samp{un} for +@samp{uncompressed}. If this option is used, the files are passed to the +corresponding decompressor (or transmitted unmodified) without verifying +their format, and the exact file name must be given. Other names won't be +tried. @item -P @itemx --perl-regexp @@ -724,8 +782,9 @@ Use binary I/O on platforms affected by the bug known as "text mode I/O". Select non-matching lines. @item --verbose -Verbose mode. Show error messages. When specified before @samp{--version}, -print the version of the grep program used. +Verbose mode. Show error messages. When specified before @option{--version}, +print the version of the grep program used. Repeating it increases the +verbosity level. @xref{version}. @item -w @itemx --word-regexp @@ -738,10 +797,10 @@ Match only whole lines. @item -Z @itemx --null Output a zero byte (the ASCII NUL character) instead of the character that -normally follows a file name. For example, 'zgrep -lZ' outputs a zero byte -after each file name instead of the usual newline. This option makes the -output unambiguous, even in the presence of file names containing unusual -characters like newlines. +normally follows a file name. For example, @w{@samp{zgrep -lZ}} outputs a +zero byte after each file name instead of the usual newline. This option +makes the output unambiguous, even in the presence of file names containing +unusual characters like newlines. @end table @@ -750,14 +809,16 @@ characters like newlines. @chapter Ztest @cindex ztest -ztest verifies the integrity of the compressed files specified. -Uncompressed files are ignored. If a file is specified as @samp{-}, the -integrity of compressed data read from standard input is verified. Data -read from standard input must be all in the same compressed format. If -a file fails to decompress, does not exist, can't be opened, or is a -terminal, ztest continues verifying the rest of the files. A final -diagnostic is shown at verbosity level 1 or higher if any file fails the -test when testing multiple files. +@command{ztest} verifies the integrity of the compressed files specified. It +also warns if an uncompressed file has a compressed file name extension, or +if a compressed file has a wrong compressed extension. Uncompressed files +are otherwise ignored. If a file is specified as @samp{-}, the integrity of +compressed data read from standard input is verified. Data read from +standard input must be all in the same compressed format. If a file fails to +decompress, does not exist, can't be opened, or is a terminal, @command{ztest} +continues verifying the rest of the files. A final diagnostic is shown at +verbosity level 1 or higher if any file fails the test when testing multiple +files. If no files are specified, recursive searches examine the current working directory, and nonrecursive searches read standard input. @@ -776,18 +837,19 @@ warning. Therefore, xz files can't always be verified as reliably as files in the other formats can. @c We can only hope that xz is soon abandoned. -The format for running ztest is: +The format for running @command{ztest} is: @example ztest [@var{options}] [@var{files}] @end example @noindent -The exit status is 0 if all compressed files verify OK, 1 if -environmental problems (file not found, invalid flags, I/O errors, etc), -2 if any compressed file is corrupt or invalid. +Exit status is 0 if all compressed files verify OK, 1 if environmental +problems (file not found, invalid command line options, I/O errors, etc), +2 if any compressed file is corrupt or invalid, or if any file has an +incorrect file name extension. -ztest supports the following options: +@command{ztest} supports the following options: @table @code @item -O @var{format} @@ -815,8 +877,8 @@ recursively, following all symbolic links. @item -v @itemx --verbose -Verbose mode. Show the verify status for each file processed.@* -Further -v's increase the verbosity level. +Verbose mode. Show the verify status for each file processed. Further -v's +increase the verbosity level. @xref{version}. @end table @@ -825,30 +887,30 @@ Further -v's increase the verbosity level. @chapter Zupdate @cindex zupdate -zupdate recompresses files from bzip2, gzip, xz, and zstd formats to lzip -format. Each original is compared with the new file and then deleted. Only -regular files with standard file name extensions are recompressed, other -files are ignored. Compressed files are decompressed and then recompressed -on the fly; no temporary files are created. If an error happens while -recompressing a file, zupdate exits immediately without recompressing the -rest of the files. The lzip format is chosen as destination because it is -the most appropriate for long-term data archiving. +@command{zupdate} recompresses files from bzip2, gzip, xz, and zstd formats +to lzip format. Each original is compared with the new file and then +deleted. Only regular files with standard file name extensions are +recompressed, other files are ignored. Compressed files are decompressed and +then recompressed on the fly; no temporary files are created. If an error +happens while recompressing a file, @command{zupdate} exits immediately +without recompressing the rest of the files. The lzip format is chosen as +destination because it is the most appropriate for long-term data archiving. If no files are specified, recursive searches examine the current working directory, and nonrecursive searches do nothing. -If the lzip compressed version of a file already exists, the file is -skipped unless the option @samp{--force} is given. In this case, if the -comparison with the existing lzip version fails, an error is returned -and the original file is not deleted. The operation of zupdate is meant -to be safe and not cause any data loss. Therefore, existing lzip -compressed files are never overwritten nor deleted. +If the lzip compressed version of a file already exists, the file is skipped +unless the option @option{--force} is given. In this case, if the comparison +with the existing lzip version fails, an error is returned and the original +file is not deleted. The operation of @command{zupdate} is meant to be safe +and not cause any data loss. Therefore, existing lzip compressed files are +never overwritten nor deleted. Recompressing files from a read-only file system to another place can be done by first linking the files from the destination directory and then compressing the links: @w{@samp{ln -s /src/foo.gz . && zupdate foo.gz}} -Combining the options @samp{--force} and @samp{--keep}, as in +Combining the options @option{--force} and @option{--keep}, as in @w{@samp{zupdate -f -k *.gz}}, verifies that there are no differences between each pair of files in a multiformat set of files. @@ -857,20 +919,20 @@ The names of the original files must have one of the following extensions:@* recompressed to @samp{.lz};@* @samp{.tbz}, @samp{.tbz2}, @samp{.tgz}, @samp{.txz}, or @samp{.tzst}, which are recompressed to @samp{.tlz}.@* -Keeping the combined extensions (@samp{.tgz} --> @samp{.tlz}) may be useful -when recompressing Slackware packages, for example. +Keeping the combined extensions @w{(@samp{.tgz} --> @samp{.tlz})} may be +useful when recompressing Slackware packages, for example. Bzip2, gzip, and lzip are the primary formats. Xz and zstd are optional. If the decompressor for the xz or zstd formats is not found, the corresponding files are ignored. -Recompressing a file is much like copying or moving it. Therefore zupdate -preserves the access and modification dates, permissions, and, if you have -appropriate privileges, ownership of the file just as @w{@samp{cp -p}} does. -(If the user ID or the group ID can't be duplicated, the file permission -bits S_ISUID and S_ISGID are cleared). +Recompressing a file is much like copying or moving it. Therefore +@command{zupdate} preserves the access and modification dates, permissions, +and, if you have appropriate privileges, ownership of the file just as +@w{@samp{cp -p}} does. (If the user ID or the group ID can't be duplicated, +the file permission bits S_ISUID and S_ISGID are cleared). -The format for running zupdate is: +The format for running @command{zupdate} is: @example zupdate [@var{options}] [@var{files}] @@ -880,12 +942,29 @@ zupdate [@var{options}] [@var{files}] Exit status is 0 if all the compressed files were successfully recompressed (if needed), compared, and deleted (if requested). 1 if a non-fatal error occurred (file not found or not regular, or has invalid format, or can't be -deleted). 2 if a fatal error occurred (compressor can't be run, or -comparison fails). +deleted). 2 if a fatal error occurred (invalid command line options, +compressor can't be run, or comparison fails). -zupdate supports the following options: +@command{zupdate} supports the following options: @table @code +@item -d @var{dir} +@itemx --destdir=@var{dir} +Write recompressed files to another directory, using @var{dir} as base +directory, instead of writing them in the same directory as the original +files. In recursive mode, this is done by replacing each directory specified +in the command line with @var{dir} to produce the recompressed file names. +For example, @w{@samp{zupdate -r -d @var{dir} ../a}} recompresses a file +named @file{../a/b/c.gz} to @file{@var{dir}/b/c.lz}. Regular files specified +in the command line are recompressed directly into @var{dir}. For example, +@w{@samp{zupdate -d @var{dir} ../a/b/c.gz}} writes the recompressed file to +@file{@var{dir}/c.lz}. + +This option allows recompressing files from a read-only file system to +another place without the need to copy or link them to the destination +directory first. (Remember to use option @option{--keep} when recompressing +read-only files to avoid warnings about files that can't be deleted). + @item -e @itemx --expand-extensions Expand combined file name extensions; recompress @samp{.tbz}, @samp{.tbz2}, @@ -894,7 +973,7 @@ Expand combined file name extensions; recompress @samp{.tbz}, @samp{.tbz2}, @item -f @itemx --force Don't skip a file for which a lzip compressed version already exists. -@samp{--force} compares the content of the input file with the content +@option{--force} compares the content of the input file with the content of the existing lzip file and deletes the input file if both contents are identical. @@ -908,10 +987,10 @@ Keep (don't delete) the input file after comparing it with the lzip file. @item -l @itemx --lzip-verbose -Pass one option @samp{-v} to the lzip compressor so that it shows the +Pass one option @option{-v} to the lzip compressor so that it shows the compression ratio for each file processed. Using lzip 1.15 or newer, a -second @samp{-l} shows the progress of compression. Use it together with -@samp{-v} to see the name of the file. +second @option{-l} shows the progress of compression. Use it together with +@option{-v} to see the name of the file. @item -q @itemx --quiet @@ -930,13 +1009,30 @@ recursively, following all symbolic links. @item -v @itemx --verbose -Verbose mode. Show the files being processed. A second @samp{-v} also -shows the files being ignored. +Verbose mode. Show the files being processed. A second @option{-v} also shows +the files being ignored and increases the verbosity level. @xref{version}. @item -0 .. -9 -Set the compression level of lzip. By default zupdate passes @samp{-9} to -lzip. Custom compression options can be passed to lzip with the option -@samp{--lz}. For example @w{@samp{--lz='lzip -9 -s64MiB'}}. +Set the compression level of lzip. By default @command{zupdate} passes +@option{-9} to lzip. Custom compression options can be passed to lzip with +the option @option{--lz}. For example @w{@option{--lz='lzip -9 -s64MiB'}}. + +@anchor{lz-compressor} +@item --lz=@var{command} +Set compression command. @var{command} may include arguments. For example +@w{@option{--lz='plzip --threads=2'}}. The name of the program can't begin +with @samp{-}. This option overrides the value set in @file{zutils.conf}. +The compression program used does not need to implement decompression +(@pxref{compressor-requirements}), but it must implement at least the +compression level option @option{-9} and the option @w{@option{-o @var{file}}} +to write the compressed output to @var{file}. +@uref{http://www.nongnu.org/lzip/manual/tarlz_manual.html,,tarlz} meets +these requirements, and therefore can be used to recompress POSIX tar +archives by using a command like +@w{@samp{zupdate --lz='tarlz -9 -z --no-solid' archive.tar.gz}}. +@ifnothtml +@xref{Top,tarlz manual,,tarlz}. +@end ifnothtml @end table @@ -36,13 +36,13 @@ int verbosity = 0; namespace { -const char * const config_file_name = "zutilsrc"; +const char * const config_file_name = "zutils.conf"; const char * const program_year = "2022"; std::string compressor_names[num_formats] = { "bzip2", "gzip", "lzip", "xz", "zstd" }; // default compressor names -// args to compressors read from rc or from options like --lz, maybe empty +// args to compressors read from .conf or from options like --lz, maybe empty std::vector< std::string > compressor_args[num_formats]; // vector of enabled formats plus [num_formats] for uncompressed. @@ -189,7 +189,7 @@ bool parse_rc_line( const std::string & line, } - // Return 0 if success, 1 if file not found, 2 if syntax error. + // Return 0 if success, 1 if file not found, 2 if syntax or I/O error. int process_rcfile( const std::string & name ) { FILE * const f = std::fopen( name.c_str(), "r" ); @@ -205,24 +205,42 @@ int process_rcfile( const std::string & name ) if( !parse_rc_line( line, name.c_str(), linenum ) ) { retval = 2; break; } } - std::fclose( f ); + if( std::fclose( f ) != 0 && retval == 0 ) + { show_file_error( name.c_str(), "Error closing config file", errno ); + retval = 2; } return retval; } + +void show_using_version( const char * const command ) + { + FILE * const f = popen( command, "r" ); + if( f ) + { + char command_version[1024] = { 0 }; + const int rd = std::fread( command_version, 1, sizeof command_version, f ); + pclose( f ); + int i = 0; + while( i + 1 < rd && command_version[i] != '\n' ) ++i; + command_version[i] = 0; + if( command_version[0] ) std::printf( "Using %s\n", command_version ); + } + } + } // end namespace bool enabled_format( const int format_index ) { if( enabled_formats.size() <= num_formats ) return true; // all enabled - if( format_index < 0 ) return enabled_formats[num_formats]; // uncompressed + if( format_index < 0 || format_index >= num_formats ) + return enabled_formats[num_formats]; // uncompressed return enabled_formats[format_index]; } void parse_format_list( const std::string & arg, const char * const pn ) { - const std::string un( "uncompressed" ); bool error = arg.empty(); enabled_formats.assign( num_formats + 1, false ); @@ -235,26 +253,25 @@ void parse_format_list( const std::string & arg, const char * const pn ) for( int i = 0; i < num_formats; ++i ) if( s == format_names[i] ) { format_index = i; break; } - if( format_index == num_formats && un.find( s ) != 0 ) + if( format_index == num_formats && s != "un" ) // uncompressed { error = true; break; } enabled_formats[format_index] = true; } if( !error ) return; - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Bad argument in option '%s'.\n", - program_name, pn ); + show_option_error( arg.c_str(), "Invalid format in", pn ); std::exit( 1 ); } -int parse_format_type( const std::string & arg, const char * const pn ) +int parse_format_type( const std::string & arg, const char * const pn, + const bool allow_uncompressed ) { for( int i = 0; i < num_formats; ++i ) if( arg == format_names[i] ) return i; - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Bad argument in option '%s'.\n", - program_name, pn ); + if( allow_uncompressed && arg == "un" ) return num_formats; + show_option_error( arg.c_str(), ( arg.find( ',' ) < arg.size() ) ? + "Too many formats in" : "Invalid format in", pn ); std::exit( 1 ); } @@ -286,10 +303,11 @@ void maybe_process_config_file( const Arg_parser & parser ) for( int i = 0; i < parser.arguments(); ++i ) if( parser.code( i ) == 'N' ) return; std::string name; - const char * p = std::getenv( "HOME" ); if( p ) name = p; + const char * p = std::getenv( "XDG_CONFIG_HOME" ); if( p ) name = p; + else { p = std::getenv( "HOME" ); if( p ) { name = p; name += "/.config"; } } if( name.size() ) { - name += "/."; name += config_file_name; + name += '/'; name += config_file_name; const int retval = process_rcfile( name ); if( retval == 0 ) return; if( retval == 2 ) std::exit( 2 ); @@ -300,11 +318,12 @@ void maybe_process_config_file( const Arg_parser & parser ) } -void parse_compressor( const std::string & arg, const int format_index, - const int eretval ) +void parse_compressor( const std::string & arg, const char * const pn, + const int format_index, const int eretval ) { if( !parse_compressor_command( arg, 0, format_index ) ) - { show_error( "Missing compressor name." ); std::exit( eretval ); } + { show_option_error( arg.c_str(), "Invalid compressor command in", pn ); + std::exit( eretval ); } } @@ -313,7 +332,7 @@ const char * get_compressor_name( const int format_index ) if( format_index >= 0 && format_index < num_formats && compressor_names[format_index].size() ) return compressor_names[format_index].c_str(); - return 0; + return 0; // uncompressed/unknown } @@ -334,20 +353,16 @@ void show_version( const char * const command ) { std::printf( "%s (zutils) %s\n", program_name, PROGVERSION ); std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); - if( command && verbosity >= 1 ) - { - FILE * const f = popen( command, "r" ); - if( f ) + if( command && verbosity >= 1 ) show_using_version( command ); + if( verbosity >= 1 + ( command != 0 ) ) + for( int format_index = 0; format_index < num_formats; ++format_index ) { - char command_version[1024] = { 0 }; - const int rd = std::fread( command_version, 1, sizeof command_version, f ); - pclose( f ); - int i = 0; - while( i + 1 < rd && command_version[i] != '\n' ) ++i; - command_version[i] = 0; - if( command_version[0] ) std::printf( "Using %s\n", command_version ); + if( !enabled_format( format_index ) ) continue; + std::string compressor_command( compressor_names[format_index] ); + if( compressor_command.empty() ) continue; + compressor_command += " -V 2> /dev/null"; + show_using_version( compressor_command.c_str() ); } - } std::printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n" "This is free software: you are free to change and redistribute it.\n" "There is NO WARRANTY, to the extent permitted by law.\n" ); @@ -385,6 +400,15 @@ void internal_error( const char * const msg ) } +void show_option_error( const char * const arg, const char * const msg, + const char * const option_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: '%s': %s option '%s'.\n", + program_name, arg, msg, option_name ); + } + + void show_close_error( const char * const prog_name ) { if( verbosity >= 0 ) @@ -15,7 +15,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -enum { fmt_bz2, fmt_gz, fmt_lz, fmt_xz, fmt_zst, num_formats }; // format_index +// format_index; < 0 means undefined, >= num_formats means uncompressed +enum { fmt_bz2, fmt_gz, fmt_lz, fmt_xz, fmt_zst, num_formats }; + const char * const format_names[num_formats] = { "bz2", "gz", "lz", "xz", "zst" }; const char * const simple_extensions[num_formats] = @@ -23,15 +25,22 @@ const char * const simple_extensions[num_formats] = const int format_order[num_formats] = { fmt_lz, fmt_bz2, fmt_gz, fmt_zst, fmt_xz }; // search order -bool enabled_format( const int format_index ); +bool enabled_format( const int format_index ); // -1 == uncompressed void parse_format_list( const std::string & arg, const char * const pn ); -int parse_format_type( const std::string & arg, const char * const pn ); +// Return num_formats if arg == "un" (uncompressed). +int parse_format_type( const std::string & arg, const char * const pn, + const bool allow_uncompressed = true ); int extension_index( const std::string & name ); // -1 if unknown int extension_format( const int eindex ); // -1 if uncompressed const char * extension_from( const int eindex ); const char * extension_to( const int eindex ); +// Return format_index, or -1 if uncompressed. +// +inline int test_extension( const std::string & name ) + { return extension_format( extension_index( name ) ); } + extern const char * invocation_name; extern const char * program_name; extern int verbosity; @@ -40,8 +49,8 @@ class Arg_parser; void maybe_process_config_file( const Arg_parser & parser ); -void parse_compressor( const std::string & arg, const int format_index, - const int eretval = 2 ); +void parse_compressor( const std::string & arg, const char * const pn, + const int format_index, const int eretval = 2 ); const char * get_compressor_name( const int format_index ); const std::vector< std::string > & get_compressor_args( const int format_index ); @@ -53,6 +62,8 @@ void show_error( const char * const msg, const int errcode = 0, void show_file_error( const char * const filename, const char * const msg, const int errcode = 0 ); void internal_error( const char * const msg ); +void show_option_error( const char * const arg, const char * const msg, + const char * const option_name ); void show_close_error( const char * const prog_name = "data feeder" ); void show_exec_error( const char * const prog_name ); void show_fork_error( const char * const prog_name ); diff --git a/recursive.cc b/recursive.cc index c5f57bd..a0cdaa4 100644 --- a/recursive.cc +++ b/recursive.cc @@ -79,7 +79,7 @@ bool next_filename( std::list< std::string > & filenames, i > 1 && input_filename[i-1] == '/'; --i ) input_filename.resize( i - 1 ); // remove trailing slashes struct stat stdot, *stdotp = 0; - if( input_filename[0] != '/' ) // relative path + if( input_filename[0] != '/' ) // relative file name { if( input_filename == "." ) input_filename.clear(); if( stat( ".", &stdot ) == 0 && S_ISDIR( stdot.st_mode ) ) diff --git a/testsuite/check.sh b/testsuite/check.sh index 5ad9d5b..6d79f20 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -47,8 +47,8 @@ done cat "${testdir}"/test.txt > in || framework_failure cat "${testdir}"/test.txt.tar > in.tar || framework_failure -printf "01234567890" > pin.tar || framework_failure -cat in.tar in.tar in.tar in.tar >> pin.tar || framework_failure +printf "01234567890" > pin.tar4 || framework_failure +cat in.tar in.tar in.tar in.tar >> pin.tar4 || framework_failure cat in > -in- || framework_failure cat in.lz > -in-.lz || framework_failure cat in.lz > lz_only.lz || framework_failure @@ -92,6 +92,8 @@ cmp in copy || test_failed $LINENO cmp in copy || test_failed $LINENO "${ZCAT}" -N -O lz - - < in.lz > copy || test_failed $LINENO cmp in copy || test_failed $LINENO +"${ZCAT}" -N -O un in.lz | lzip -d > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO "${ZCAT}" -N --lz='lzip -q' < in.lz > copy || test_failed $LINENO cmp in copy || test_failed $LINENO "${ZCAT}" -N in > copy || test_failed $LINENO @@ -151,28 +153,31 @@ for i in ${extensions}; do "${ZCMP}" -N empty empty.$i || test_failed $LINENO $i done -"${ZCMP}" -Nq in in6 +"${ZCMP}" -N -q in in6 [ $? = 1 ] || test_failed $LINENO "${ZCMP}" -N -n 0 in in6 || test_failed $LINENO "${ZCMP}" -N -n 100B in in6 || test_failed $LINENO "${ZCMP}" -N -n 1k in in6 || test_failed $LINENO "${ZCMP}" -N -n 10kB in in6 || test_failed $LINENO -"${ZCMP}" -Nq in.tar pin.tar +"${ZCMP}" -N -n 01750 in in6 || test_failed $LINENO +"${ZCMP}" -N -n 0x3E8 in in6 || test_failed $LINENO +"${ZCMP}" -N -s in.tar pin.tar4 [ $? = 1 ] || test_failed $LINENO -"${ZCMP}" -Nq -i 0B:11B in.tar pin.tar +"${ZCMP}" -N -q -i 0B:11B in.tar pin.tar4 [ $? = 1 ] || test_failed $LINENO -"${ZCMP}" -N -i 0:11 -n 0 in.tar pin.tar || test_failed $LINENO -"${ZCMP}" -N -i 0:11 -n 100 in.tar pin.tar || test_failed $LINENO -"${ZCMP}" -N -i 0:11 -n 1Ki in.tar pin.tar || test_failed $LINENO -"${ZCMP}" -N -i 0:11 -n 10KiB in.tar pin.tar || test_failed $LINENO +"${ZCMP}" -N -i 0:11 -n 0 in.tar pin.tar4 || test_failed $LINENO +"${ZCMP}" -N -i 0:11 -n 100 in.tar pin.tar4 || test_failed $LINENO +"${ZCMP}" -N -i 0:013 -n 1Ki in.tar pin.tar4 || test_failed $LINENO +"${ZCMP}" -N -i 0:0xB -n 10KiB in.tar pin.tar4 || test_failed $LINENO "${ZCMP}" -N - - || test_failed $LINENO -"${ZCMP}" -Nq - +"${ZCMP}" -N -q - [ $? = 2 ] || test_failed $LINENO "${ZCMP}" -N in in || test_failed $LINENO "${ZCMP}" -N in || test_failed $LINENO "${ZCMP}" -N --format=gz,bz2 in || test_failed $LINENO "${ZCMP}" -N --format=gz in || test_failed $LINENO "${ZCMP}" -N in.lz in.gz || test_failed $LINENO +cat in.lz | "${ZCMP}" -N -O un,un in.lz - || test_failed $LINENO "${ZCMP}" -N --lz='lzip -q' in.lz in.gz || test_failed $LINENO "${ZCMP}" -N in.gz -- -in-.lz || test_failed $LINENO "${ZCMP}" -N -- -in-.lz in.gz || test_failed $LINENO @@ -187,25 +192,25 @@ done "${ZCMP}" -N in - < in.lz || test_failed $LINENO "${ZCMP}" -N - in < in.lz || test_failed $LINENO "${ZCMP}" -N lz_only.lz - < in || test_failed $LINENO -"${ZCMP}" -Nq lz_only.lz +"${ZCMP}" -N -q lz_only.lz [ $? = 2 ] || test_failed $LINENO -"${ZCMP}" -Nq "" in +"${ZCMP}" -N -q "" in [ $? = 2 ] || test_failed $LINENO -"${ZCMP}" -Nq --force-format=lz in.lz +"${ZCMP}" -N -q --force-format=lz in.lz [ $? = 2 ] || test_failed $LINENO -"${ZCMP}" -Nq --force-format=lz in.gz in.lz +"${ZCMP}" -N -q --force-format=lz in.gz in.lz [ $? = 2 ] || test_failed $LINENO -"${ZCMP}" -Nq -i 100BB in in +"${ZCMP}" -N -q -i 100BB in in [ $? = 2 ] || test_failed $LINENO -"${ZCMP}" -Nq -i 100BB:100 in in +"${ZCMP}" -N -q -i 100BB:100 in in [ $? = 2 ] || test_failed $LINENO -"${ZCMP}" -Nq -i 100: in in +"${ZCMP}" -N -q -i 100: in in [ $? = 2 ] || test_failed $LINENO -"${ZCMP}" -Nq -n -1 in in +"${ZCMP}" -N -q -n -1 in in [ $? = 2 ] || test_failed $LINENO "${ZCMP}" -N -q -n 100BB in in [ $? = 2 ] || test_failed $LINENO -"${ZCMP}" -Nq --gz=bad-gzip in.gz in.lz +"${ZCMP}" -N -q --gz=bad-gzip in.gz in.lz [ $? = 2 ] || test_failed $LINENO "${ZCMP}" -N --bad-option in in 2> /dev/null [ $? = 2 ] || test_failed $LINENO @@ -227,7 +232,7 @@ done "${ZDIFF}" -N in in6 > /dev/null [ $? = 1 ] || test_failed $LINENO # GNU diff 3.0 returns 2 when binary files differ -"${ZDIFF}" -N in.tar pin.tar > /dev/null && test_failed $LINENO +"${ZDIFF}" -N in.tar pin.tar4 > /dev/null && test_failed $LINENO "${ZDIFF}" -N - - || test_failed $LINENO "${ZDIFF}" -N - 2> /dev/null [ $? = 2 ] || test_failed $LINENO @@ -236,6 +241,7 @@ done "${ZDIFF}" -N --format=gz,bz2 in || test_failed $LINENO "${ZDIFF}" -N --format=gz in || test_failed $LINENO "${ZDIFF}" -N in.lz in.gz > /dev/null || test_failed $LINENO +cat in.gz | "${ZDIFF}" -N -O un,un - in.gz || test_failed $LINENO "${ZDIFF}" -N --lz='lzip -q' in.lz in.gz > /dev/null || test_failed $LINENO "${ZDIFF}" -N in.gz -- -in-.lz > /dev/null || test_failed $LINENO "${ZDIFF}" -N -- -in-.lz in.gz > /dev/null || test_failed $LINENO @@ -303,8 +309,8 @@ for i in ${extensions}; do "${ZGREP}" -N "nx_pattern" empty.$i && test_failed $LINENO $i done -"${ZGREP}" -N pin.tar -e "GNU" > /dev/null || test_failed $LINENO -"${ZGREP}" -N "GNU" < pin.tar > /dev/null || test_failed $LINENO +"${ZGREP}" -N pin.tar4 -e "GNU" > /dev/null || test_failed $LINENO +"${ZGREP}" -N "GNU" < pin.tar4 > /dev/null || test_failed $LINENO "${ZGREP}" -N -r "GNU" . > /dev/null || test_failed $LINENO "${ZGREP}" -N -r "GNU" > /dev/null || test_failed $LINENO "${ZGREP}" -N -R "GNU" . > /dev/null || test_failed $LINENO @@ -313,6 +319,7 @@ done "${ZGREP}" -N -e "GNU" in > /dev/null || test_failed $LINENO "${ZGREP}" -N "GNU" < in > /dev/null || test_failed $LINENO "${ZGREP}" -N -O lz "nx_pattern" - - < in.lz > /dev/null && test_failed $LINENO +"${ZGREP}" -N -O un "LZIP" in.lz > /dev/null || test_failed $LINENO "${ZGREP}" -N -e "-free" --lz='lzip -q' < in.lz > /dev/null || test_failed $LINENO "${ZGREP}" -N -- "-free" -in- > /dev/null || test_failed $LINENO @@ -329,7 +336,7 @@ done "${ZGREP}" -N -L "nx_pattern" in in.gz in.bz2 in.lz -- -in- > /dev/null && test_failed $LINENO "${ZGREP}" -Nq -l "01234567890" in "${bad1_lz}" in.lz && test_failed $LINENO -"${ZGREP}" -Nq -l "01234567890" in "${bad1_lz}" in.lz pin.tar > /dev/null || +"${ZGREP}" -Nq -l "01234567890" in "${bad1_lz}" in.lz pin.tar4 > /dev/null || test_failed $LINENO "${ZGREP}" -N "GNU" . @@ -355,8 +362,11 @@ for i in ${extensions}; do [ $? = 2 ] || test_failed $LINENO $i "${ZTEST}" -N --force-format=$i in 2> /dev/null [ $? = 2 ] || test_failed $LINENO $i + "${ZTEST}" -N empty.$i 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i done +rm -f empty.bz2 empty.gz empty.lz || framework_failure "${ZTEST}" -N in in.gz in.bz2 in.lz -- -in- || test_failed $LINENO "${ZTEST}" -N < in.gz || test_failed $LINENO "${ZTEST}" -N < in.bz2 || test_failed $LINENO @@ -368,7 +378,23 @@ done "${ZTEST}" -N -r || test_failed $LINENO "${ZTEST}" -N -R . || test_failed $LINENO "${ZTEST}" -N -R || test_failed $LINENO -"${ZTEST}" -N empty empty.bz2 empty.gz empty.lz || test_failed $LINENO +"${ZTEST}" -N empty || test_failed $LINENO +rm -f empty || framework_failure + +# test wrong compressed extensions +cat in.bz2 > in_bz2.gz || framework_failure +cat in.gz > in_gz.lz || framework_failure +cat in.lz > in_lz.bz2 || framework_failure +cat in > in_un.lz || framework_failure +"${ZTEST}" -Nq in_bz2.gz +[ $? = 2 ] || test_failed $LINENO +"${ZTEST}" -Nq in_gz.lz +[ $? = 2 ] || test_failed $LINENO +"${ZTEST}" -Nq in_lz.bz2 +[ $? = 2 ] || test_failed $LINENO +"${ZTEST}" -Nq in_un.lz +[ $? = 2 ] || test_failed $LINENO +rm -f in_bz2.gz in_gz.lz in_lz.bz2 in_un.lz || framework_failure "${ZTEST}" -Nq in.gz "${bad0_lz}" in.bz2 "${bad1_lz}" in.lz [ $? = 2 ] || test_failed $LINENO @@ -378,12 +404,16 @@ lines=`"${ZTEST}" -Nv in.gz "${bad0_lz}" in.bz2 "${bad1_lz}" in.lz 2>&1 | wc -l` [ "${lines}" -eq 6 ] || test_failed $LINENO "${lines}" "${ZTEST}" -Nq < in [ $? = 2 ] || test_failed $LINENO +"${ZTEST}" -Nq --force-format=un < in.gz +[ $? = 1 ] || test_failed $LINENO "${ZTEST}" -Nq "" < in.lz [ $? = 1 ] || test_failed $LINENO dd if=in.lz bs=1000 count=1 2> /dev/null | "${ZTEST}" -N -q [ $? = 2 ] || test_failed $LINENO "${ZTEST}" -Nq --force-format=lz in.bz2 [ $? = 2 ] || test_failed $LINENO +"${ZTEST}" -Nq --force-format=un in.gz +[ $? = 1 ] || test_failed $LINENO "${ZTEST}" -N --lz='lzip --bad-option' in.lz 2> /dev/null [ $? = 1 ] || test_failed $LINENO "${ZTEST}" -N --bad-option 2> /dev/null @@ -410,20 +440,30 @@ cat in.gz > a.gz || framework_failure "${ZUPDATE}" -N --bad-option 2> /dev/null [ $? = 2 ] || test_failed $LINENO +if /bin/sh -c "tarlz -V" > /dev/null 2>&1; then + printf . + gzip < in.tar > in.tar.gz || framework_failure + "${ZUPDATE}" -N -k --lz='tarlz -0 -z --no-solid' in.tar.gz || + test_failed $LINENO + [ -e in.tar ] || test_failed $LINENO + "${ZCMP}" -N in.tar.gz in.tar.lz || test_failed $LINENO + rm -f in.tar.gz in.tar.lz || framework_failure +fi + cat in.lz in.lz > a.lz || framework_failure -"${ZUPDATE}" -Nq -f a.bz2 a.gz +"${ZUPDATE}" -N -q -f a.bz2 a.gz [ $? = 2 ] || test_failed $LINENO [ -e a.bz2 ] || test_failed $LINENO [ -e a.gz ] || test_failed $LINENO [ -e a.lz ] || test_failed $LINENO rm -f a.lz || framework_failure -"${ZUPDATE}" -N a.bz2 || test_failed $LINENO +"${ZUPDATE}" -N -0 a.bz2 || test_failed $LINENO [ ! -e a.bz2 ] || test_failed $LINENO [ -e a.gz ] || test_failed $LINENO [ -e a.lz ] || test_failed $LINENO rm -f a.lz || framework_failure -"${ZUPDATE}" -N a.gz || test_failed $LINENO +"${ZUPDATE}" -N -0 a.gz || test_failed $LINENO [ ! -e a.bz2 ] || test_failed $LINENO [ ! -e a.gz ] || test_failed $LINENO [ -e a.lz ] || test_failed $LINENO @@ -431,7 +471,7 @@ rm -f a.lz || framework_failure cat in.bz2 > a.bz2 || framework_failure cat in.gz > a.gz || framework_failure -"${ZUPDATE}" -Nq a.bz2 a.gz +"${ZUPDATE}" -N -q -0 a.bz2 a.gz [ $? = 1 ] || test_failed $LINENO [ ! -e a.bz2 ] || test_failed $LINENO [ -e a.gz ] || test_failed $LINENO @@ -440,7 +480,7 @@ rm -f a.lz || framework_failure cat in.bz2 > a.bz2 || framework_failure cat in.gz > a.gz || framework_failure -"${ZUPDATE}" -N -f -k a.bz2 a.gz || test_failed $LINENO +"${ZUPDATE}" -N -0 -f -k a.bz2 a.gz || test_failed $LINENO [ -e a.bz2 ] || test_failed $LINENO [ -e a.gz ] || test_failed $LINENO [ -e a.lz ] || test_failed $LINENO @@ -448,7 +488,7 @@ rm -f a.lz || framework_failure cat in.bz2 > a.bz2 || framework_failure cat in.gz > a.gz || framework_failure -"${ZUPDATE}" -N -f a.bz2 a.gz || test_failed $LINENO +"${ZUPDATE}" -N -0 -f a.bz2 a.gz || test_failed $LINENO [ ! -e a.bz2 ] || test_failed $LINENO [ ! -e a.gz ] || test_failed $LINENO [ ! -e a ] || test_failed $LINENO @@ -458,7 +498,7 @@ rm -f a.lz || framework_failure cat in.bz2 > a.tbz || framework_failure # keep combined extensions cat in.bz2 > b.tbz2 || framework_failure cat in.gz > c.tgz || framework_failure -"${ZUPDATE}" -N a.tbz b.tbz2 c.tgz || test_failed $LINENO +"${ZUPDATE}" -N -0 a.tbz b.tbz2 c.tgz || test_failed $LINENO [ ! -e a.tbz ] || test_failed $LINENO [ ! -e b.tbz2 ] || test_failed $LINENO [ ! -e c.tgz ] || test_failed $LINENO @@ -476,7 +516,7 @@ rm -f a.tlz b.tlz c.tlz || framework_failure cat in.bz2 > a.tbz || framework_failure # expand combined extensions cat in.bz2 > b.tbz2 || framework_failure cat in.gz > c.tgz || framework_failure -"${ZUPDATE}" -N -e a.tbz b.tbz2 c.tgz || test_failed $LINENO +"${ZUPDATE}" -N -0 -e a.tbz b.tbz2 c.tgz || test_failed $LINENO [ ! -e a.tbz ] || test_failed $LINENO [ ! -e b.tbz2 ] || test_failed $LINENO [ ! -e c.tgz ] || test_failed $LINENO @@ -495,7 +535,7 @@ rm -f a.tar.lz b.tar.lz c.tar.lz || framework_failure cat in.bz2 > a.bz2 || framework_failure cat "${bad0_gz}" > b.gz || framework_failure cat in.gz > c.gz || framework_failure -"${ZUPDATE}" -N -f a.bz2 b.gz c.gz 2> /dev/null +"${ZUPDATE}" -N -0 -f a.bz2 b.gz c.gz 2> /dev/null [ $? = 1 ] || test_failed $LINENO [ ! -e a.bz2 ] || test_failed $LINENO [ -e b.gz ] || test_failed $LINENO @@ -508,7 +548,7 @@ cat in.gz > c.gz || framework_failure cat in.bz2 > a.bz2 || framework_failure cat "${bad0_gz}" > b.gz || framework_failure cat in.gz > c.gz || framework_failure -"${ZUPDATE}" -N -f -i a.bz2 b.gz c.gz 2> /dev/null +"${ZUPDATE}" -N -0 -f -i a.bz2 b.gz c.gz 2> /dev/null [ $? = 1 ] || test_failed $LINENO [ ! -e a.bz2 ] || test_failed $LINENO [ -e b.gz ] || test_failed $LINENO @@ -517,13 +557,13 @@ cat in.gz > c.gz || framework_failure rm -f a.lz b.gz c.gz || framework_failure cat in.bz2 > a.bz2 || framework_failure -"${ZUPDATE}" -N -1 -q a.bz2 || test_failed $LINENO +"${ZUPDATE}" -N -0 -q a.bz2 || test_failed $LINENO [ ! -e a.bz2 ] || test_failed $LINENO [ -e a.lz ] || test_failed $LINENO rm -f a.lz || framework_failure cat in.gz > 'name with spaces.gz' || framework_failure -"${ZUPDATE}" -N -1 -q 'name with spaces.gz' || test_failed $LINENO +"${ZUPDATE}" -N -0 -q 'name with spaces.gz' || test_failed $LINENO [ ! -e 'name with spaces.gz' ] || test_failed $LINENO [ -e 'name with spaces.lz' ] || test_failed $LINENO "${ZCMP}" -N in 'name with spaces.lz' || test_failed $LINENO @@ -533,12 +573,34 @@ mkdir tmp2 mkdir tmp2/tmp3 cat in.bz2 > tmp2/tmp3/a.bz2 || framework_failure cat in.gz > tmp2/tmp3/a.gz || framework_failure -"${ZUPDATE}" -N -r --format=gz tmp2 || test_failed $LINENO +# test recursive to destdir +"${ZUPDATE}" -N -0 -k -r --format=gz --destdir=ddir1 tmp2 || test_failed $LINENO +[ -e tmp2/tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp2/tmp3/a.gz ] || test_failed $LINENO +[ -e ddir1/tmp3/a.lz ] || test_failed $LINENO +"${ZUPDATE}" -N -0 -k -r --format=bz2 --destdir="${objdir}"/tmp/ddir2 tmp2 || + test_failed $LINENO +[ -e tmp2/tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp2/tmp3/a.gz ] || test_failed $LINENO +[ -e ddir2/tmp3/a.lz ] || test_failed $LINENO +# test non-recursive to destdir +"${ZUPDATE}" -N -0 -k --destdir=ddir3 tmp2/tmp3/a.gz || test_failed $LINENO +[ -e tmp2/tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp2/tmp3/a.gz ] || test_failed $LINENO +[ -e ddir3/a.lz ] || test_failed $LINENO +"${ZUPDATE}" -N -0 -k --destdir=ddir4/tmp2/tmp3 tmp2/tmp3/a.gz || + test_failed $LINENO +[ -e tmp2/tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp2/tmp3/a.gz ] || test_failed $LINENO +[ -e ddir4/tmp2/tmp3/a.lz ] || test_failed $LINENO +rm -rf ddir1 ddir2 ddir3 ddir4 || framework_failure +# test recursive in place +"${ZUPDATE}" -N -0 -r --format=gz tmp2 || test_failed $LINENO [ -e tmp2/tmp3/a.bz2 ] || test_failed $LINENO [ ! -e tmp2/tmp3/a.gz ] || test_failed $LINENO [ -e tmp2/tmp3/a.lz ] || test_failed $LINENO rm -f tmp2/tmp3/a.lz || framework_failure -"${ZUPDATE}" -N -r --format=bz2 tmp2 || test_failed $LINENO +"${ZUPDATE}" -N -0 -r --format=bz2 tmp2 || test_failed $LINENO [ ! -e tmp2/tmp3/a.bz2 ] || test_failed $LINENO [ ! -e tmp2/tmp3/a.gz ] || test_failed $LINENO [ -e tmp2/tmp3/a.lz ] || test_failed $LINENO @@ -546,34 +608,33 @@ rm -f tmp2/tmp3/a.lz || framework_failure cat in.bz2 > tmp2/tmp3/a.bz2 || framework_failure cat in.gz > tmp2/tmp3/a.gz || framework_failure cd tmp2 || framework_failure -"${ZUPDATE}" -N -r -k -f . || test_failed $LINENO +"${ZUPDATE}" -N -0 -r -k -f . || test_failed $LINENO [ -e tmp3/a.bz2 ] || test_failed $LINENO [ -e tmp3/a.gz ] || test_failed $LINENO [ -e tmp3/a.lz ] || test_failed $LINENO rm -f tmp3/a.lz || framework_failure -"${ZUPDATE}" -N -r -k -f || test_failed $LINENO +"${ZUPDATE}" -N -0 -r -k -f || test_failed $LINENO [ -e tmp3/a.bz2 ] || test_failed $LINENO [ -e tmp3/a.gz ] || test_failed $LINENO [ -e tmp3/a.lz ] || test_failed $LINENO rm -f tmp3/a.lz || framework_failure -"${ZUPDATE}" -N -R -k -f . || test_failed $LINENO +"${ZUPDATE}" -N -0 -R -k -f . || test_failed $LINENO [ -e tmp3/a.bz2 ] || test_failed $LINENO [ -e tmp3/a.gz ] || test_failed $LINENO [ -e tmp3/a.lz ] || test_failed $LINENO rm -f tmp3/a.lz || framework_failure -"${ZUPDATE}" -N -R -k -f || test_failed $LINENO +"${ZUPDATE}" -N -0 -R -k -f || test_failed $LINENO [ -e tmp3/a.bz2 ] || test_failed $LINENO [ -e tmp3/a.gz ] || test_failed $LINENO [ -e tmp3/a.lz ] || test_failed $LINENO rm -f tmp3/a.lz || framework_failure -"${ZUPDATE}" -N -r -f . || test_failed $LINENO +"${ZUPDATE}" -N -0 -r -f . || test_failed $LINENO [ ! -e tmp3/a.bz2 ] || test_failed $LINENO [ ! -e tmp3/a.gz ] || test_failed $LINENO [ -e tmp3/a.lz ] || test_failed $LINENO cd .. || framework_failure rm -r tmp2 || framework_failure -rm -f empty empty.bz2 empty.gz empty.lz || framework_failure if ln -s '.' slink 2> /dev/null ; then "${ZCAT}" -N -r slink > /dev/null || test_failed $LINENO "${ZGREP}" -N -r "GNU" slink > /dev/null || test_failed $LINENO diff --git a/testsuite/zcat_vs.dat b/testsuite/zcat_vs.dat index 29978fd..42333e8 100644 --- a/testsuite/zcat_vs.dat +++ b/testsuite/zcat_vs.dat @@ -1,8 +1,8 @@ Worst case test file for zcat -vs. First 4096 input bytes produce 4095 output bytes because of -s. Next 4096 input bytes produce 16384 output bytes, accumulating a total -of 20479 bytes in the output buffer. ----------------------------------------------- +of 20479 bytes (5 * 4096 - 1) in the output buffer. +------------------------------- ............................................................... @@ -115,7 +115,7 @@ void show_help() " -M, --format=<list> process only the formats in <list>\n" " -n, --number number all output lines\n" " -N, --no-rcfile don't read runtime configuration file\n" - " -O, --force-format=<fmt> force the format given (bz2, gz, lz, xz, zst)\n" + " -O, --force-format=<fmt> force the input format\n" " -q, --quiet suppress all messages\n" " -r, --recursive operate recursively on directories\n" " -R, --dereference-recursive recursively follow symbolic links\n" @@ -128,7 +128,9 @@ void show_help() " --gz=<command> set compressor and options for gzip format\n" " --lz=<command> set compressor and options for lzip format\n" " --xz=<command> set compressor and options for xz format\n" - " --zst=<command> set compressor and options for zstd format\n" ); + " --zst=<command> set compressor and options for zstd format\n" + "\nValid formats for options '-M' and '-O' are 'bz2', 'gz', 'lz', 'xz', 'zst',\n" + "and 'un' for uncompressed.\n" ); show_help_addr(); } @@ -235,10 +237,10 @@ bool cat( int infd, const int format_index, const std::string & input_filename, enum { buffer_size = 4096, outbuf_size = (5 * buffer_size) + 256 + 1 }; // input buffer with space for sentinel newline at the end uint8_t * const inbuf = new uint8_t[buffer_size+1]; - // output buffer with space for character quoting, 255-digit line number, - // worst case flushing respect to inbuf, and a canary byte. + /* output buffer with space for character quoting, 255-digit line number, + worst case flushing respect to inbuf, and a canary byte. */ uint8_t * const outbuf = new uint8_t[outbuf_size]; - outbuf[outbuf_size-1] = 0; + outbuf[outbuf_size-1] = 0; // canary byte; quoting does not print 0 Children children; bool error = false; @@ -258,7 +260,7 @@ bool cat( int infd, const int format_index, const std::string & input_filename, int main( const int argc, const char * const argv[] ) { enum { verbose_opt = 256, bz2_opt, gz_opt, lz_opt, xz_opt, zst_opt }; - int format_index = -1; + int format_index = -1; // undefined int recursive = 0; // 1 = '-r', 2 = '-R' std::list< std::string > filenames; Cat_options cat_options; @@ -338,11 +340,11 @@ int main( const int argc, const char * const argv[] ) case 'v': cat_options.show_nonprinting = true; break; case 'V': show_version(); return 0; case verbose_opt: if( verbosity < 4 ) ++verbosity; break; - case bz2_opt: parse_compressor( arg, fmt_bz2, 1 ); break; - case gz_opt: parse_compressor( arg, fmt_gz, 1 ); break; - case lz_opt: parse_compressor( arg, fmt_lz, 1 ); break; - case xz_opt: parse_compressor( arg, fmt_xz, 1 ); break; - case zst_opt: parse_compressor( arg, fmt_zst, 1 ); break; + case bz2_opt: parse_compressor( arg, pn, fmt_bz2, 1 ); break; + case gz_opt: parse_compressor( arg, pn, fmt_gz, 1 ); break; + case lz_opt: parse_compressor( arg, pn, fmt_lz, 1 ); break; + case xz_opt: parse_compressor( arg, pn, fmt_xz, 1 ); break; + case zst_opt: parse_compressor( arg, pn, fmt_zst, 1 ); break; default : internal_error( "uncaught option." ); } } // end process options @@ -40,7 +40,7 @@ #include "zutils.h" #ifndef LLONG_MAX -#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL +#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL #endif @@ -70,27 +70,32 @@ void show_help() " -h, --help display this help and exit\n" " -V, --version output version information and exit\n" " -b, --print-bytes print differing bytes\n" + " -H, --hexadecimal print hexadecimal values instead of octal\n" " -i, --ignore-initial=<n>[:<n2>] ignore differences in the first <n> bytes\n" " -l, --list list position, value of all differing bytes\n" " -M, --format=<list> process only the formats in <list>\n" " -n, --bytes=<n> compare at most <n> bytes\n" " -N, --no-rcfile don't read runtime configuration file\n" - " -O, --force-format=[<f1>][,<f2>] force the formats given (bz2,gz,lz,xz,zst)\n" - " -q, --quiet suppress all messages\n" - " -s, --silent (same as --quiet)\n" - " -v, --verbose verbose mode (same as --list)\n" + " -O, --force-format=[<f1>][,<f2>] force one or both input formats\n" + " -q, --quiet, --silent suppress diagnostics written to stderr\n" + " -s, --script suppress messages about file differences\n" + " -v, --verbose verbose mode (opposite of --quiet)\n" " --bz2=<command> set compressor and options for bzip2 format\n" " --gz=<command> set compressor and options for gzip format\n" " --lz=<command> set compressor and options for lzip format\n" " --xz=<command> set compressor and options for xz format\n" " --zst=<command> set compressor and options for zstd format\n" - "\nNumbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" - "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); + "\nValid formats for options '-M' and '-O' are 'bz2', 'gz', 'lz', 'xz', 'zst',\n" + "and 'un' for uncompressed.\n" + "\nByte counts given as arguments to options may be expressed in decimal,\n" + "hexadecimal, or octal (using the same syntax as integer constants in C++),\n" + "and may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc.\n" ); show_help_addr(); } -// separate large numbers >= 100_000 in groups of 3 digits using '_' +// separate numbers of 5 or more digits in groups of 3 digits using '_' const char * format_num3( long long num ) { const char * const si_prefix = "kMGTPEZY"; @@ -103,20 +108,20 @@ const char * format_num3( long long num ) char * p = buf + bufsize - 1; // fill the buffer backwards *p = 0; // terminator const bool negative = num < 0; - if( negative ) num = -num; char prefix = 0; // try binary first, then si - for( int i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i ) + for( int i = 0; i < 8 && num != 0 && ( num / 1024 ) * 1024 == num; ++i ) { num /= 1024; prefix = binary_prefix[i]; } if( prefix ) *(--p) = 'i'; else - for( int i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i ) + for( int i = 0; i < 8 && num != 0 && ( num / 1000 ) * 1000 == num; ++i ) { num /= 1000; prefix = si_prefix[i]; } if( prefix ) *(--p) = prefix; - const bool split = num >= 100000; + const bool split = num >= 10000 || num <= -10000; for( int i = 0; ; ) { - *(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break; + long long onum = num; num /= 10; + *(--p) = llabs( onum - ( 10 * num ) ) + '0'; if( num == 0 ) break; if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; } } if( negative ) *(--p) = '-'; @@ -133,12 +138,8 @@ long long getnum( const char * const arg, const char * const option_name, errno = 0; long long result = strtoll( arg, &tail, 0 ); if( tail == arg ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Bad or missing numerical argument in " - "option '%s'.\n", program_name, option_name ); - std::exit( 2 ); - } + { show_option_error( arg, "Bad or missing numerical argument in", + option_name ); std::exit( 2 ); } if( result < 0 ) errno = ERANGE; if( !errno && tail[0] && std::isalpha( tail[0] ) ) @@ -163,12 +164,8 @@ long long getnum( const char * const arg, const char * const option_name, case 'B': if( factor == 1000 && !bsuf ) exponent = 0; break; } if( exponent < 0 ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Bad multiplier in numerical argument of " - "option '%s'.\n", program_name, option_name ); - std::exit( 2 ); - } + { show_option_error( arg, "Bad multiplier in numerical argument of", + option_name ); std::exit( 2 ); } for( int i = 0; i < exponent; ++i ) { if( ulimit / factor >= result ) result *= factor; @@ -179,8 +176,8 @@ long long getnum( const char * const arg, const char * const option_name, if( errno ) { if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] " - "in option '%s'.\n", program_name, format_num3( llimit ), + std::fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in " + "option '%s'.\n", program_name, arg, format_num3( llimit ), format_num3( ulimit ), option_name ); std::exit( 2 ); } @@ -197,13 +194,7 @@ void parse_ignore_initial( const char * const arg, const char * const pn, if( *tail == ':' || *tail == ',' ) ignore_initial[1] = getnum( ++tail, pn ); else if( *tail == 0 ) ignore_initial[1] = ignore_initial[0]; - else - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Bad separator in argument of option '%s'.\n", - program_name, pn ); - std::exit( 2 ); - } + else { show_option_error( arg, "Missing colon in", pn ); std::exit( 2 ); } } @@ -227,8 +218,8 @@ bool skip_ignore_initial( const long long ignore_initial, const int infd ) } -// Put into buf the unsigned char c, making unprintable bytes -// visible by quoting like cat -t does. +/* Put into buf the unsigned char c, making unprintable bytes visible by + quoting like cat -t does. */ void sprintc( char * const buf, unsigned char c ) { int i = 0; @@ -251,7 +242,7 @@ int block_compare( const uint8_t * const buffer0, const uint8_t * p0 = buffer0; const uint8_t * p1 = buffer1; - if( verbosity == 0 ) + if( line_numberp ) { int nl_count = 0; while( *p0 == *p1 ) @@ -265,7 +256,8 @@ int block_compare( const uint8_t * const buffer0, int cmp( const long long max_size, const int infd[2], const std::string filenames[2], bool finished[2], - const bool print_bytes ) + const bool hexadecimal, const bool list, const bool print_bytes, + const bool scripted ) { const int buffer_size = 4096; unsigned long long byte_number = 1; @@ -277,7 +269,7 @@ int cmp( const long long max_size, const int infd[2], uint8_t * const buffer1 = buffer0 + buffer_size + 1; uint8_t * buffer[2]; buffer[0] = buffer0; buffer[1] = buffer1; - int different = 0; + int retval = 0; while( rest > 0 ) { @@ -289,7 +281,7 @@ int cmp( const long long max_size, const int infd[2], rd[i] = readblock( infd[i], buffer[i], size ); if( rd[i] != size && errno ) { show_file_error( filenames[i].c_str(), "Read error", errno ); - return 2; } + retval = 2; goto done; } } for( int i = 0; i < 2; ++i ) if( rd[i] < size ) finished[i] = true; @@ -298,13 +290,14 @@ int cmp( const long long max_size, const int infd[2], buffer0[min_rd] = 0; // sentinels for the block compare buffer1[min_rd] = 1; - int first_diff = block_compare( buffer0, buffer1, &line_number ); + int first_diff = block_compare( buffer0, buffer1, list ? 0 : &line_number ); byte_number += first_diff; if( first_diff < min_rd ) { - if( verbosity < 0 ) return 1; // return status only - if( verbosity == 0 ) // show first difference + retval = 1; // difference found + if( scripted ) break; // status only + if( !list ) // show first difference { if( !print_bytes ) std::printf( "%s %s differ: byte %llu, line %llu\n", @@ -316,16 +309,17 @@ int cmp( const long long max_size, const int infd[2], const unsigned char c1 = buffer1[first_diff]; char buf0[5], buf1[5]; sprintc( buf0, c0 ); sprintc( buf1, c1 ); - std::printf( "%s %s differ: byte %llu, line %llu is %3o %s %3o %s\n", + std::printf( hexadecimal ? + "%s %s differ: byte %llu, line %llu is %02X %s %02X %s\n" : + "%s %s differ: byte %llu, line %llu is %3o %s %3o %s\n", filenames[0].c_str(), filenames[1].c_str(), byte_number, line_number, c0, buf0, c1, buf1 ); } std::fflush( stdout ); - return 1; + break; } - else // verbosity > 0 ; show all differences + else // list ; show all differences { - different = 1; for( ; first_diff < min_rd; ++byte_number, ++first_diff ) { const unsigned char c0 = buffer0[first_diff]; @@ -333,12 +327,14 @@ int cmp( const long long max_size, const int infd[2], if( c0 != c1 ) { if( !print_bytes ) - std::printf( "%llu %3o %3o\n", byte_number, c0, c1 ); + std::printf( hexadecimal ? "%llu %02X %02X\n" : "%llu %3o %3o\n", + byte_number, c0, c1 ); else { char buf0[5], buf1[5]; sprintc( buf0, c0 ); sprintc( buf1, c1 ); - std::printf( "%llu %3o %-4s %3o %s\n", + std::printf( hexadecimal ? "%llu %02X %-4s %02X %s\n" : + "%llu %3o %-4s %3o %s\n", byte_number, c0, buf0, c1, buf1 ); } } @@ -350,15 +346,18 @@ int cmp( const long long max_size, const int infd[2], if( rd[0] != rd[1] ) { if( verbosity >= 0 ) - std::fprintf( stderr, "%s: EOF on %s\n", - program_name, filenames[rd[1]<rd[0]].c_str() ); - return 1; + std::fprintf( stderr, list ? + "%s: EOF on %s after byte %llu\n" : + "%s: EOF on %s after byte %llu, in line %llu\n", + program_name, filenames[rd[1]<rd[0]].c_str(), + byte_number - 1, line_number ); + retval = 1; break; } if( min_rd != buffer_size ) break; } - +done: delete[] buffer0; - return different; + return retval; } } // end namespace @@ -370,8 +369,11 @@ int main( const int argc, const char * const argv[] ) // number of initial bytes ignored for each file long long ignore_initial[2] = { 0, 0 }; long long max_size = -1; // < 0 means unlimited size - int format_types[2] = { -1, -1 }; - bool print_bytes = false; + int format_types[2] = { -1, -1 }; // < 0 means undefined + bool hexadecimal = false; + bool list = false; // list position, value of all differing bytes + bool print_bytes = false; // print differing bytes + bool scripted = false; // suppress messages about file differences program_name = "zcmp"; invocation_name = ( argc > 0 ) ? argv[0] : program_name; @@ -379,6 +381,7 @@ int main( const int argc, const char * const argv[] ) { { 'b', "print-bytes", Arg_parser::no }, { 'h', "help", Arg_parser::no }, + { 'H', "hexadecimal", Arg_parser::no }, { 'i', "ignore-initial", Arg_parser::yes }, { 'l', "list", Arg_parser::no }, { 'M', "format", Arg_parser::yes }, @@ -386,7 +389,8 @@ int main( const int argc, const char * const argv[] ) { 'N', "no-rcfile", Arg_parser::no }, { 'O', "force-format", Arg_parser::yes }, { 'q', "quiet", Arg_parser::no }, - { 's', "silent", Arg_parser::no }, + { 'q', "silent", Arg_parser::no }, + { 's', "script", Arg_parser::no }, { 'v', "verbose", Arg_parser::no }, { 'V', "version", Arg_parser::no }, { bz2_opt, "bz2", Arg_parser::yes }, @@ -414,21 +418,22 @@ int main( const int argc, const char * const argv[] ) { case 'b': print_bytes = true; break; case 'h': show_help(); return 0; + case 'H': hexadecimal = true; break; case 'i': parse_ignore_initial( arg, pn, ignore_initial ); break; - case 'l': verbosity = 1; break; + case 'l': list = true; break; case 'M': parse_format_list( sarg, pn ); break; case 'n': max_size = getnum( arg, pn ); break; case 'N': break; case 'O': parse_format_types2( sarg, pn, format_types ); break; - case 'q': - case 's': verbosity = -1; break; - case 'v': verbosity = 1; break; + case 'q': verbosity = -1; break; + case 's': scripted = true; break; + case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; - case bz2_opt: parse_compressor( sarg, fmt_bz2 ); break; - case gz_opt: parse_compressor( sarg, fmt_gz ); break; - case lz_opt: parse_compressor( sarg, fmt_lz ); break; - case xz_opt: parse_compressor( sarg, fmt_xz ); break; - case zst_opt: parse_compressor( sarg, fmt_zst ); break; + case bz2_opt: parse_compressor( sarg, pn, fmt_bz2 ); break; + case gz_opt: parse_compressor( sarg, pn, fmt_gz ); break; + case lz_opt: parse_compressor( sarg, pn, fmt_lz ); break; + case xz_opt: parse_compressor( sarg, pn, fmt_xz ); break; + case zst_opt: parse_compressor( sarg, pn, fmt_zst ); break; default : internal_error( "uncaught option." ); } } // end process options @@ -438,12 +443,10 @@ int main( const int argc, const char * const argv[] ) setmode( STDOUT_FILENO, O_BINARY ); #endif - if( argind >= parser.arguments() ) - { show_error( "No files given.", 0, true ); return 2; } - if( parser.arguments() - argind > 2 ) - { show_error( "Too many files.", 0, true ); return 2; } - const int files = parser.arguments() - argind; + if( files < 1 ) { show_error( "No files given.", 0, true ); return 2; } + if( files > 2 ) { show_error( "Too many files.", 0, true ); return 2; } + std::string filenames[2]; // file names of the two input files filenames[0] = parser.argument( argind ); if( files == 2 ) filenames[1] = parser.argument( argind + 1 ); @@ -498,7 +501,8 @@ int main( const int argc, const char * const argv[] ) } bool finished[2] = { false, false }; - int retval = cmp( max_size, infd, filenames, finished, print_bytes ); + int retval = cmp( max_size, infd, filenames, finished, hexadecimal, list, + print_bytes, scripted ); for( int i = 0; i < 2; ++i ) if( !good_status( children[i], finished[i] ) ) retval = 2; diff --git a/zcmpdiff.cc b/zcmpdiff.cc index 5688ee2..31b3f86 100644 --- a/zcmpdiff.cc +++ b/zcmpdiff.cc @@ -54,11 +54,13 @@ void parse_format_types2( const std::string & arg, const char * const pn, int format_types[2] ) { const unsigned i = std::min( arg.find( ',' ), arg.size() ); - if( i > 0 ) format_types[0] = parse_format_type( arg.substr( 0, i ), pn ); - else format_types[0] = -1; - if( i + 1 < arg.size() ) format_types[1] = - parse_format_type( arg.substr( i + 1 ), pn ); - else format_types[1] = -1; + if( i != std::min( arg.rfind( ',' ), arg.size() ) ) + { show_option_error( arg.c_str(), "Too many formats in", pn ); + std::exit( 1 ); } + format_types[0] = + ( i > 0 ) ? parse_format_type( arg.substr( 0, i ), pn ) : -1; + format_types[1] = + ( i + 1 < arg.size() ) ? parse_format_type( arg.substr( i + 1 ), pn ) : -1; } @@ -79,7 +79,7 @@ void show_help() " -i, --ignore-case ignore case differences in file contents\n" " -M, --format=<list> process only the formats in <list>\n" " -N, --no-rcfile don't read runtime configuration file\n" - " -O, --force-format=[<f1>][,<f2>] force the formats given (bz2,gz,lz,xz,zst)\n" + " -O, --force-format=[<f1>][,<f2>] force one or both input formats\n" " -p, --show-c-function show which C function each change is in\n" " -q, --brief output only whether files differ\n" " -s, --report-identical-files report when two files are identical\n" @@ -95,7 +95,9 @@ void show_help() " --gz=<command> set compressor and options for gzip format\n" " --lz=<command> set compressor and options for lzip format\n" " --xz=<command> set compressor and options for xz format\n" - " --zst=<command> set compressor and options for zstd format\n" ); + " --zst=<command> set compressor and options for zstd format\n" + "\nValid formats for options '-M' and '-O' are 'bz2', 'gz', 'lz', 'xz', 'zst',\n" + "and 'un' for uncompressed.\n" ); show_help_addr(); } @@ -128,7 +130,7 @@ bool set_fifonames( const std::string filenames[2] ) if( p ) { fifonames[0] = p; fifonames[0] += '/'; } else fifonames[0] = "/tmp/"; - int n = getpid(); + unsigned n = getpid(); do fifonames[0] += codes[n % num_codes]; while( n /= num_codes ); const unsigned pos = fifonames[0].size(); fifonames[0] += '_'; @@ -186,12 +188,8 @@ bool set_data_feeder( const std::string & filename, { const int outfd = open( fifoname.c_str(), O_WRONLY | O_BINARY ); if( outfd < 0 ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Can't open FIFO '%s' for writing: %s\n", - program_name, fifoname.c_str(), std::strerror( errno ) ); - _exit( 2 ); - } + { show_file_error( fifoname.c_str(), "Can't open FIFO for writing", + errno ); _exit( 2 ); } if( dup2( fda[0], STDIN_FILENO ) >= 0 && dup2( outfd, STDOUT_FILENO ) >= 0 && close( fda[0] ) == 0 && close( fda[1] ) == 0 && @@ -225,12 +223,8 @@ bool set_data_feeder( const std::string & filename, { const int outfd = open( fifoname.c_str(), O_WRONLY | O_BINARY ); if( outfd < 0 ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Can't open FIFO '%s' for writing: %s\n", - program_name, fifoname.c_str(), std::strerror( errno ) ); - _exit( 2 ); - } + { show_file_error( fifoname.c_str(), "Can't open FIFO for writing", + errno ); _exit( 2 ); } if( !feed_data( filename, infd, outfd, magic_data, magic_size ) ) _exit( 2 ); if( close( outfd ) != 0 ) @@ -268,7 +262,7 @@ int main( const int argc, const char * const argv[] ) { enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt }; std::vector< const char * > diff_args; // args to diff, maybe empty - int format_types[2] = { -1, -1 }; + int format_types[2] = { -1, -1 }; // < 0 means undefined program_name = "zdiff"; invocation_name = ( argc > 0 ) ? argv[0] : program_name; @@ -325,8 +319,7 @@ int main( const int argc, const char * const argv[] ) case 'b': diff_args.push_back( "-b" ); break; case 'B': diff_args.push_back( "-B" ); break; case 'c': diff_args.push_back( "-c" ); break; - case 'C': diff_args.push_back( "-C" ); - diff_args.push_back( arg ); break; + case 'C': diff_args.push_back( "-C" ); diff_args.push_back( arg ); break; case 'd': diff_args.push_back( "-d" ); break; case 'E': diff_args.push_back( "-E" ); break; case 'h': show_help(); return 0; @@ -340,19 +333,17 @@ int main( const int argc, const char * const argv[] ) case 't': diff_args.push_back( "-t" ); break; case 'T': diff_args.push_back( "-T" ); break; case 'u': diff_args.push_back( "-u" ); break; - case 'U': diff_args.push_back( "-U" ); - diff_args.push_back( arg ); break; - case 'v': verbosity = 1; break; + case 'U': diff_args.push_back( "-U" ); diff_args.push_back( arg ); break; + case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version( DIFF " --version" ); return 0; case 'w': diff_args.push_back( "-w" ); break; - case 'W': diff_args.push_back( "-W" ); - diff_args.push_back( arg ); break; + case 'W': diff_args.push_back( "-W" ); diff_args.push_back( arg ); break; case 'y': diff_args.push_back( "-y" ); break; - case bz2_opt: parse_compressor( sarg, fmt_bz2 ); break; - case gz_opt: parse_compressor( sarg, fmt_gz ); break; - case lz_opt: parse_compressor( sarg, fmt_lz ); break; - case xz_opt: parse_compressor( sarg, fmt_xz ); break; - case zst_opt: parse_compressor( sarg, fmt_zst ); break; + case bz2_opt: parse_compressor( sarg, pn, fmt_bz2 ); break; + case gz_opt: parse_compressor( sarg, pn, fmt_gz ); break; + case lz_opt: parse_compressor( sarg, pn, fmt_lz ); break; + case xz_opt: parse_compressor( sarg, pn, fmt_xz ); break; + case zst_opt: parse_compressor( sarg, pn, fmt_zst ); break; default : internal_error( "uncaught option." ); } } // end process options @@ -362,12 +353,10 @@ int main( const int argc, const char * const argv[] ) setmode( STDOUT_FILENO, O_BINARY ); #endif - if( argind >= parser.arguments() ) - { show_error( "No files given.", 0, true ); return 2; } - if( parser.arguments() - argind > 2 ) - { show_error( "Too many files.", 0, true ); return 2; } - const int files = parser.arguments() - argind; + if( files < 1 ) { show_error( "No files given.", 0, true ); return 2; } + if( files > 2 ) { show_error( "Too many files.", 0, true ); return 2; } + std::string filenames[2]; // file names of the two input files filenames[0] = parser.argument( argind ); if( files == 2 ) filenames[1] = parser.argument( argind + 1 ); @@ -93,7 +93,7 @@ void show_help() " -n, --line-number print the line number of each line\n" " -N, --no-rcfile don't read runtime configuration file\n" " -o, --only-matching show only the part of a line matching <pattern>\n" - " -O, --force-format=<fmt> force the format given (bz2, gz, lz, xz, zst)\n" + " -O, --force-format=<fmt> force the input format\n" " -P, --perl-regexp <pattern> is a Perl regular expression\n" " -q, --quiet, --silent suppress all messages\n" " -r, --recursive operate recursively on directories\n" @@ -111,6 +111,8 @@ void show_help() " --lz=<command> set compressor and options for lzip format\n" " --xz=<command> set compressor and options for xz format\n" " --zst=<command> set compressor and options for zstd format\n" + "\nValid formats for options '-M' and '-O' are 'bz2', 'gz', 'lz', 'xz', 'zst',\n" + "and 'un' for uncompressed.\n" "\nNumbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); show_help_addr(); @@ -210,7 +212,7 @@ int main( const int argc, const char * const argv[] ) { enum { help_opt = 256, verbose_opt, color_opt, label_opt, linebuf_opt, bz2_opt, gz_opt, lz_opt, xz_opt, zst_opt }; - int format_index = -1; + int format_index = -1; // undefined int list_mode = 0; // 1 = list matches, -1 = list non-matches int recursive = 0; // 1 = '-r', 2 = '-R' int show_name = -1; // tri-state bool @@ -222,7 +224,7 @@ int main( const int argc, const char * const argv[] ) std::vector< const char * > grep_args; // args to grep, maybe empty std::string color_option; // additional args to grep std::string label_option; - std::string label = "(standard input)"; // prefix for standard input + const char * label = "(standard input)"; // prefix for standard input program_name = "zgrep"; invocation_name = ( argc > 0 ) ? argv[0] : program_name; @@ -294,19 +296,16 @@ int main( const int argc, const char * const argv[] ) switch( code ) { case 'a': grep_args.push_back( "-a" ); break; - case 'A': grep_args.push_back( "-A" ); - grep_args.push_back( arg ); break; + case 'A': grep_args.push_back( "-A" ); grep_args.push_back( arg ); break; case 'b': grep_args.push_back( "-b" ); break; - case 'B': grep_args.push_back( "-B" ); - grep_args.push_back( arg ); break; + case 'B': grep_args.push_back( "-B" ); grep_args.push_back( arg ); break; case 'c': grep_args.push_back( "-c" ); break; - case 'C': grep_args.push_back( "-C" ); - grep_args.push_back( arg ); break; - case 'e': grep_args.push_back( "-e" ); - grep_args.push_back( arg ); pattern_found = true; break; + case 'C': grep_args.push_back( "-C" ); grep_args.push_back( arg ); break; + case 'e': grep_args.push_back( "-e" ); grep_args.push_back( arg ); + pattern_found = true; break; case 'E': grep_args.push_back( "-E" ); break; - case 'f': grep_args.push_back( "-f" ); - grep_args.push_back( arg ); pattern_found = true; break; + case 'f': grep_args.push_back( "-f" ); grep_args.push_back( arg ); + pattern_found = true; break; case 'F': grep_args.push_back( "-F" ); break; case 'G': grep_args.push_back( "-G" ); break; case 'h': show_name = false; break; @@ -315,8 +314,7 @@ int main( const int argc, const char * const argv[] ) case 'I': grep_args.push_back( "-I" ); break; case 'l': grep_args.push_back( "-l" ); list_mode = 1; break; case 'L': grep_args.push_back( "-L" ); list_mode = -1; break; - case 'm': grep_args.push_back( "-m" ); - grep_args.push_back( arg ); break; + case 'm': grep_args.push_back( "-m" ); grep_args.push_back( arg ); break; case 'M': parse_format_list( sarg, pn ); break; case 'n': grep_args.push_back( "-n" ); break; case 'N': break; @@ -340,14 +338,14 @@ int main( const int argc, const char * const argv[] ) case color_opt: color_option = "--color"; if( !sarg.empty() ) { color_option += '='; color_option += sarg; } break; - case label_opt: label_option = label = sarg; break; + case label_opt: label_option = sarg; label = arg; break; case linebuf_opt: grep_args.push_back( "--line-buffered" ); line_buffered = true; break; - case bz2_opt: parse_compressor( sarg, fmt_bz2 ); break; - case gz_opt: parse_compressor( sarg, fmt_gz ); break; - case lz_opt: parse_compressor( sarg, fmt_lz ); break; - case xz_opt: parse_compressor( sarg, fmt_xz ); break; - case zst_opt: parse_compressor( sarg, fmt_zst ); break; + case bz2_opt: parse_compressor( sarg, pn, fmt_bz2 ); break; + case gz_opt: parse_compressor( sarg, pn, fmt_gz ); break; + case lz_opt: parse_compressor( sarg, pn, fmt_lz ); break; + case xz_opt: parse_compressor( sarg, pn, fmt_xz ); break; + case zst_opt: parse_compressor( sarg, pn, fmt_zst ); break; default : internal_error( "uncaught option." ); } } // end process options @@ -50,14 +50,16 @@ namespace { void show_help() { - std::printf( "ztest verifies the integrity of the compressed files specified.\n" - "Uncompressed files are ignored. If a file is specified as '-', the\n" - "integrity of compressed data read from standard input is verified. Data\n" - "read from standard input must be all in the same compressed format. If\n" - "a file fails to decompress, does not exist, can't be opened, or is a\n" - "terminal, ztest continues verifying the rest of the files. A final\n" - "diagnostic is shown at verbosity level 1 or higher if any file fails the\n" - "test when testing multiple files.\n" + std::printf( "ztest verifies the integrity of the compressed files specified. It\n" + "also warns if an uncompressed file has a compressed file name extension, or\n" + "if a compressed file has a wrong compressed extension. Uncompressed files\n" + "are otherwise ignored. If a file is specified as '-', the integrity of\n" + "compressed data read from standard input is verified. Data read from\n" + "standard input must be all in the same compressed format. If a file fails to\n" + "decompress, does not exist, can't be opened, or is a terminal, ztest\n" + "continues verifying the rest of the files. A final diagnostic is shown at\n" + "verbosity level 1 or higher if any file fails the test when testing multiple\n" + "files.\n" "\nIf no files are specified, recursive searches examine the current\n" "working directory, and nonrecursive searches read standard input.\n" "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n" @@ -69,14 +71,15 @@ void show_help() "always be verified as reliably as files in the other formats can.\n" "\nUsage: ztest [options] [files]\n" "\nExit status is 0 if all compressed files verify OK, 1 if environmental\n" - "problems (file not found, invalid flags, I/O errors, etc), 2 if any\n" - "compressed file is corrupt or invalid.\n" + "problems (file not found, invalid command line options, I/O errors, etc),\n" + "2 if any compressed file is corrupt or invalid, or if any file has an\n" + "incorrect file name extension.\n" "\nOptions:\n" " -h, --help display this help and exit\n" " -V, --version output version information and exit\n" " -M, --format=<list> process only the formats in <list>\n" " -N, --no-rcfile don't read runtime configuration file\n" - " -O, --force-format=<fmt> force the format given (bz2, gz, lz, xz, zst)\n" + " -O, --force-format=<fmt> force the input format\n" " -q, --quiet suppress all messages\n" " -r, --recursive operate recursively on directories\n" " -R, --dereference-recursive recursively follow symbolic links\n" @@ -85,7 +88,8 @@ void show_help() " --gz=<command> set compressor and options for gzip format\n" " --lz=<command> set compressor and options for lzip format\n" " --xz=<command> set compressor and options for xz format\n" - " --zst=<command> set compressor and options for zstd format\n" ); + " --zst=<command> set compressor and options for zstd format\n" + "\nValid formats for options '-M' and '-O' are 'bz2', 'gz', 'lz', 'xz', and 'zst'.\n" ); show_help_addr(); } @@ -117,10 +121,8 @@ int ztest_stdin( const int infd, int format_index, if( pid == 0 ) // child1 (compressor feeder) { if( close( fda[0] ) != 0 || - !feed_data( "", infd, fda[1], magic_data, magic_size ) ) - _exit( 1 ); - if( close( fda[1] ) != 0 ) - { show_close_error(); _exit( 1 ); } + !feed_data( "-", infd, fda[1], magic_data, magic_size ) ) _exit( 1 ); + if( close( fda[1] ) != 0 ) { show_close_error(); _exit( 1 ); } _exit( 0 ); } if( pid < 0 ) // parent @@ -170,11 +172,17 @@ int ztest_file( const int infd, int format_index, static int disable_zst = -1; // tri-state bool uint8_t magic_data[magic_buf_size]; int magic_size = 0; + const int format_index_e = test_extension( input_filename ); if( format_index < 0 ) format_index = test_format( infd, magic_data, &magic_size ); const char * const compressor_name = get_compressor_name( format_index ); if( !compressor_name ) + { + if( format_index < 0 && format_index_e >= 0 ) + { show_file_error( input_filename.c_str(), + "Uncompressed file has compressed extension." ); return 2; } return 0; // ignore this file + } if( format_index == fmt_xz ) { if( disable_xz < 0 ) @@ -226,7 +234,12 @@ int ztest_file( const int infd, int format_index, { show_fork_error( compressor_name ); return 1; } const bool isgzxz = ( format_index == fmt_gz || format_index == fmt_xz ); - return wait_for_child( pid, compressor_name, 1, isgzxz ); + int retval = wait_for_child( pid, compressor_name, 1, isgzxz ); + if( retval == 0 && format_index >= 0 && format_index_e >= 0 && + format_index != format_index_e ) + { show_file_error( input_filename.c_str(), + "Compressed file has wrong compressed extension." ); retval = 2; } + return retval; } } // end namespace @@ -235,7 +248,7 @@ int ztest_file( const int infd, int format_index, int main( const int argc, const char * const argv[] ) { enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt }; - int format_index = -1; + int format_index = -1; // undefined int recursive = 0; // 1 = '-r', 2 = '-R' std::list< std::string > filenames; std::vector< const char * > ztest_args; // args to ztest, maybe empty @@ -278,18 +291,18 @@ int main( const int argc, const char * const argv[] ) case 'h': show_help(); return 0; case 'M': parse_format_list( arg, pn ); break; case 'N': break; - case 'O': format_index = parse_format_type( arg, pn ); break; + case 'O': format_index = parse_format_type( arg, pn, false ); break; case 'q': verbosity = -1; ztest_args.push_back( "-q" ); break; case 'r': recursive = 1; break; case 'R': recursive = 2; break; case 'v': if( verbosity < 4 ) ++verbosity; ztest_args.push_back( "-v" ); break; case 'V': show_version(); return 0; - case bz2_opt: parse_compressor( arg, fmt_bz2, 1 ); break; - case gz_opt: parse_compressor( arg, fmt_gz, 1 ); break; - case lz_opt: parse_compressor( arg, fmt_lz, 1 ); break; - case xz_opt: parse_compressor( arg, fmt_xz, 1 ); break; - case zst_opt: parse_compressor( arg, fmt_zst, 1 ); break; + case bz2_opt: parse_compressor( arg, pn, fmt_bz2, 1 ); break; + case gz_opt: parse_compressor( arg, pn, fmt_gz, 1 ); break; + case lz_opt: parse_compressor( arg, pn, fmt_lz, 1 ); break; + case xz_opt: parse_compressor( arg, pn, fmt_xz, 1 ); break; + case zst_opt: parse_compressor( arg, pn, fmt_zst, 1 ); break; default : internal_error( "uncaught option." ); } } // end process options @@ -325,7 +338,7 @@ int main( const int argc, const char * const argv[] ) if( isatty( infd ) ) // for example /dev/tty { - show_file_error( input_filename == "-" ? "(stdin)" : input_filename.c_str(), + show_file_error( name_or_stdin( input_filename.c_str() ), "I won't read compressed data from a terminal." ); close( infd ); error = true; continue; } @@ -73,11 +73,12 @@ void show_help() "\nExit status is 0 if all the compressed files were successfully recompressed\n" "(if needed), compared, and deleted (if requested). 1 if a non-fatal error\n" "occurred (file not found or not regular, or has invalid format, or can't be\n" - "deleted). 2 if a fatal error occurred (compressor can't be run, or\n" - "comparison fails).\n" + "deleted). 2 if a fatal error occurred (invalid command line options,\n" + "compressor can't be run, or comparison fails).\n" "\nOptions:\n" " -h, --help display this help and exit\n" " -V, --version output version information and exit\n" + " -d, --destdir=<dir> write recompressed files into <dir>\n" " -e, --expand-extensions expand combined extensions; tgz -> tar.lz\n" " -f, --force don't skip a file even if the .lz exists\n" " -i, --ignore-errors ignore non-fatal errors\n" @@ -94,11 +95,60 @@ void show_help() " --gz=<command> set compressor and options for gzip format\n" " --lz=<command> set compressor and options for lzip format\n" " --xz=<command> set compressor and options for xz format\n" - " --zst=<command> set compressor and options for zstd format\n" ); + " --zst=<command> set compressor and options for zstd format\n" + "\nValid formats for option '-M' are 'bz2', 'gz', 'lz', 'xz', and 'zst'.\n" ); show_help_addr(); } +void extract_srcdir_name( const std::string & name, std::string & srcdir ) + { + if( name.empty() || name == "." ) return; // leave srcdir empty + if( name[name.size()-1] == '/' ) // remove last slash + { srcdir.assign( name, 0, name.size() - 1 ); return; } + struct stat st; + if( stat( name.c_str(), &st ) == 0 && S_ISDIR( st.st_mode ) ) + { srcdir = name; return; } + + unsigned size = 0; // size of srcdir without last slash nor basename + for( unsigned i = name.size(); i > 0; --i ) + if( name[i-1] == '/' ) { size = i - 1; break; } + if( size > 0 ) srcdir.assign( name, 0, size ); + } + + +bool make_dirs( const std::string & name ) + { + static std::string cached_dirname; + unsigned dirsize = name.size(); // size of dirname without last slash + + for( unsigned i = name.size(); i > 0; --i ) + if( name[i-1] == '/' ) { dirsize = i - 1; break; } + if( dirsize >= name.size() ) return true; // no dirname + if( dirsize == 0 ) return true; // dirname is '/' + if( cached_dirname.size() == dirsize && + cached_dirname.compare( 0, dirsize, name ) == 0 ) return true; + + for( unsigned i = 0; i < dirsize; ) + { + while( i < dirsize && name[i] == '/' ) ++i; + const unsigned first = i; + while( i < dirsize && name[i] != '/' ) ++i; + if( first < i ) + { + std::string partial( name, 0, i ); + struct stat st; + if( stat( partial.c_str(), &st ) == 0 ) + { if( !S_ISDIR( st.st_mode ) ) return false; } + else if( mkdir( partial.c_str(), S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | + S_IXOTH ) != 0 && errno != EEXIST ) return false; + } + } + cached_dirname.assign( name, 0, dirsize ); + return true; + } + + void cant_execute( const std::string & command, const int status ) { if( verbosity >= 0 ) @@ -137,13 +187,14 @@ void set_permissions( const char * const rname, const struct stat & in_stats ) // Return value: 0 = success, -1 = file skipped, 1 = error, 2 = fatal error. int zupdate_file( const std::string & name, const char * const lzip_name, const std::vector< std::string > & lzip_args2, + const std::string & srcdir, const std::string & destdir, const bool expand, const bool force, const bool keep_input_files, const bool no_rcfile ) { // bzip2, gzip, and lzip are the primary formats. xz and zstd are optional. static int disable_xz = -1; // tri-state bool static int disable_zst = -1; // tri-state bool - int format_index = -1; + int format_index = -1; // undefined std::string rname; // recompressed name const int eindex = extension_index( name ); // search extension @@ -157,7 +208,18 @@ int zupdate_file( const std::string & name, const char * const lzip_name, program_name, name.c_str(), extension_from( eindex ) ); return 0; // ignore this file } - rname.assign( name, 0, name.size() - std::strlen( extension_from( eindex ) ) ); + if( destdir.size() ) + { + if( srcdir.size() && name.compare( 0, srcdir.size(), srcdir ) != 0 ) + internal_error( "srcdir mismatch." ); + rname = destdir; + if( rname[rname.size()-1] != '/' && name[srcdir.size()] != '/' ) + rname += '/'; + rname.append( name, srcdir.size(), name.size() - srcdir.size() - + std::strlen( extension_from( eindex ) ) ); + } + else + rname.assign( name, 0, name.size() - std::strlen( extension_from( eindex ) ) ); rname += ( std::strcmp( extension_to( eindex ), ".tar" ) == 0 ) ? ( expand ? ".tar.lz" : ".tlz" ) : ".lz"; } @@ -172,19 +234,11 @@ int zupdate_file( const std::string & name, const char * const lzip_name, struct stat in_stats; if( stat( name.c_str(), &in_stats ) != 0 ) // check input file - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Can't stat input file '%s': %s\n", - program_name, name.c_str(), std::strerror( errno ) ); - return 1; - } + { show_file_error( name.c_str(), "Can't stat input file", errno ); + return 1; } if( !S_ISREG( in_stats.st_mode ) ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Input file '%s' is not a regular file.\n", - program_name, name.c_str() ); - return 1; - } + { show_file_error( name.c_str(), "Input file is not a regular file." ); + return 1; } struct stat st; // not used const std::string rname2( rname + ".lz" ); // produced by lzip < 1.20 @@ -206,8 +260,8 @@ int zupdate_file( const std::string & name, const char * const lzip_name, std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; disable_xz = ( std::system( command.c_str() ) != 0 ); if( disable_xz && verbosity >= 2 ) - std::fprintf( stderr, "%s: '%s' not found. Ignoring xz files.\n", - program_name, compressor_name ); + show_file_error( compressor_name, + "Xz decompressor not found. Ignoring xz files." ); } if( disable_xz ) return 0; // ignore this file if no xz installed } @@ -218,8 +272,8 @@ int zupdate_file( const std::string & name, const char * const lzip_name, std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; disable_zst = ( std::system( command.c_str() ) != 0 ); if( disable_zst && verbosity >= 2 ) - std::fprintf( stderr, "%s: '%s' not found. Ignoring zstd files.\n", - program_name, compressor_name ); + show_file_error( compressor_name, + "Zstd decompressor not found. Ignoring zstd files." ); } if( disable_zst ) return 0; // ignore this file if no zstd installed } @@ -228,6 +282,9 @@ int zupdate_file( const std::string & name, const char * const lzip_name, { if( verbosity >= 1 ) std::fprintf( stderr, "Recompressing file '%s'\n", name.c_str() ); + if( destdir.size() && !make_dirs( rname ) ) + { show_file_error( rname.c_str(), "Error creating intermediate directory." ); + return 2; } int fda[2]; // pipe between decompressor and compressor if( pipe( fda ) < 0 ) { show_error( "Can't create pipe", errno ); return 2; } @@ -264,8 +321,8 @@ int zupdate_file( const std::string & name, const char * const lzip_name, { const std::vector< std::string > & lzip_args = get_compressor_args( fmt_lz ); - const int size = lzip_args.size(); - const int size2 = lzip_args2.size(); + const int size = lzip_args.size(); // from .conf or --lz + const int size2 = lzip_args2.size(); // from command line const char ** const argv = new const char *[size+size2+5]; argv[0] = lzip_name; argv[1] = "-9"; @@ -299,13 +356,14 @@ int zupdate_file( const std::string & name, const char * const lzip_name, { if( lz_exists && verbosity >= 1 ) std::fprintf( stderr, "Comparing file '%s'\n", name.c_str() ); + // Quote names in zcmp_command to allow file/dir names with spaces. std::string zcmp_command( invocation_name ); unsigned i = zcmp_command.size(); while( i > 0 && zcmp_command[i-1] != '/' ) --i; // strip "zupdate" zcmp_command.resize( i ); zcmp_command.insert( zcmp_command.begin(), '\'' ); zcmp_command += "zcmp' "; // '[dir/]zcmp' if( no_rcfile ) zcmp_command += "-N "; - if( verbosity < 0 ) zcmp_command += "-q "; + if( verbosity < 0 ) zcmp_command += "-q -s "; zcmp_command += '\''; zcmp_command += name; zcmp_command += "' '"; zcmp_command += rname; zcmp_command += '\''; int status = std::system( zcmp_command.c_str() ); @@ -315,12 +373,8 @@ int zupdate_file( const std::string & name, const char * const lzip_name, } if( !keep_input_files && std::remove( name.c_str() ) != 0 && errno != ENOENT ) - { - if( verbosity >= 0 ) - std::fprintf( stderr, "%s: Can't delete input file '%s': %s\n", - program_name, name.c_str(), std::strerror( errno ) ); - return 1; - } + { show_file_error( name.c_str(), "Can't delete input file", errno ); + return 1; } return 0; } @@ -331,7 +385,7 @@ int main( const int argc, const char * const argv[] ) { enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt }; int recursive = 0; // 1 = '-r', 2 = '-R' - std::list< std::string > filenames; + std::string destdir; // write recompressed files here std::vector< std::string > lzip_args2; // args to lzip, maybe empty bool expand = false; bool force = false; @@ -353,6 +407,7 @@ int main( const int argc, const char * const argv[] ) { '7', 0, Arg_parser::no }, { '8', 0, Arg_parser::no }, { '9', 0, Arg_parser::no }, + { 'd', "destdir", Arg_parser::yes }, { 'e', "expand-extensions", Arg_parser::no }, { 'f', "force", Arg_parser::no }, { 'h', "help", Arg_parser::no }, @@ -391,6 +446,7 @@ int main( const int argc, const char * const argv[] ) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': lzip_args2.push_back( "-" ); lzip_args2.back() += code; break; + case 'd': destdir = arg; break; case 'e': expand = true; break; case 'f': force = true; break; case 'h': show_help(); return 0; @@ -404,11 +460,11 @@ int main( const int argc, const char * const argv[] ) case 'R': recursive = 2; break; case 'v': if( verbosity < 4 ) ++verbosity; break; case 'V': show_version(); return 0; - case bz2_opt: parse_compressor( arg, fmt_bz2, 1 ); break; - case gz_opt: parse_compressor( arg, fmt_gz, 1 ); break; - case lz_opt: parse_compressor( arg, fmt_lz, 1 ); break; - case xz_opt: parse_compressor( arg, fmt_xz, 1 ); break; - case zst_opt: parse_compressor( arg, fmt_zst, 1 ); break; + case bz2_opt: parse_compressor( arg, pn, fmt_bz2, 1 ); break; + case gz_opt: parse_compressor( arg, pn, fmt_gz, 1 ); break; + case lz_opt: parse_compressor( arg, pn, fmt_lz, 1 ); break; + case xz_opt: parse_compressor( arg, pn, fmt_xz, 1 ); break; + case zst_opt: parse_compressor( arg, pn, fmt_zst, 1 ); break; default : internal_error( "uncaught option." ); } } // end process options @@ -422,22 +478,31 @@ int main( const int argc, const char * const argv[] ) if( !lzip_name ) { show_error( "Missing name of compressor for lzip format." ); return 2; } - for( ; argind < parser.arguments(); ++argind ) - filenames.push_back( parser.argument( argind ) ); - - if( filenames.empty() && recursive ) filenames.push_back( "." ); + std::list< std::string > filenames; + if( argind < parser.arguments() ) + filenames.push_back( parser.argument( argind++ ) ); // first argument + else if( recursive ) filenames.push_back( "." ); + else return 0; // nothing to do std::string input_filename; int retval = 0; bool error = false; - while( next_filename( filenames, input_filename, error, recursive, true ) ) + while( true ) { - int tmp = zupdate_file( input_filename, lzip_name, lzip_args2, expand, - force, keep_input_files, no_rcfile ); - if( tmp < 0 ) error = true; - if( tmp > retval ) retval = tmp; - if( tmp >= 2 || ( tmp == 1 && !ignore_errors ) ) break; + std::string srcdir; // dirname to be replaced by destdir + if( destdir.size() ) extract_srcdir_name( filenames.front(), srcdir ); + while( next_filename( filenames, input_filename, error, recursive, true ) ) + { + int tmp = zupdate_file( input_filename, lzip_name, lzip_args2, srcdir, + destdir, expand, force, keep_input_files, no_rcfile ); + if( tmp < 0 ) error = true; // file skipped + if( tmp > retval ) retval = tmp; + if( tmp >= 2 || ( tmp == 1 && !ignore_errors ) ) goto out; + } + if( argind >= parser.arguments() ) break; + filenames.push_back( parser.argument( argind++ ) ); } +out: if( error && retval == 0 ) retval = 1; return retval; } @@ -110,7 +110,7 @@ int writeblock( const int fd, const uint8_t * const buf, const int size ) } -// Empty filename means stdin. +// filename == "-" means stdin. // bool feed_data( const std::string & filename, const int infd, const int outfd, const uint8_t * magic_data, const int magic_size ) @@ -123,8 +123,8 @@ bool feed_data( const std::string & filename, const int infd, const int outfd, { const int size = readblock( infd, buffer, buffer_size ); if( size != buffer_size && errno ) - { const char * const name = filename.empty() ? "-" : filename.c_str(); - show_file_error( name, "Read error", errno ); return false; } + { show_file_error( name_or_stdin( filename.c_str() ), "Read error", + errno ); return false; } if( size > 0 && writeblock( outfd, buffer, size ) != size ) { show_error( "Write error", errno ); return false; } if( size < buffer_size ) break; @@ -183,8 +183,7 @@ bool set_data_feeder( const std::string & filename, int * const infdp, close( fda2[0] ) != 0 || close( fda2[1] ) != 0 || !feed_data( filename, old_infd, fda[1], magic_data, magic_size ) ) _exit( 2 ); - if( close( fda[1] ) != 0 ) - { show_close_error(); _exit( 2 ); } + if( close( fda[1] ) != 0 ) { show_close_error(); _exit( 2 ); } _exit( 0 ); } if( pid < 0 ) // parent @@ -232,8 +231,7 @@ bool set_data_feeder( const std::string & filename, int * const infdp, if( close( fda[0] ) != 0 || !feed_data( filename, old_infd, fda[1], magic_data, magic_size ) ) _exit( 2 ); - if( close( fda[1] ) != 0 ) - { show_close_error(); _exit( 2 ); } + if( close( fda[1] ) != 0 ) { show_close_error(); _exit( 2 ); } _exit( 0 ); } if( pid < 0 ) // parent @@ -246,7 +244,7 @@ bool set_data_feeder( const std::string & filename, int * const infdp, } -// Return format index, or -1 if uncompressed. +// Return format_index, or -1 if uncompressed. // int test_format( const int infd, uint8_t magic_data[], int * const magic_sizep ) @@ -2,8 +2,9 @@ # Runtime Configuration file for Zutils # # Zutils looks for this file in: -# 1 - $HOME/.zutilsrc -# 2 - ${sysconfdir}/zutilsrc +# 1 - $XDG_CONFIG_HOME/zutils.conf +# 2 - ${sysconfdir}/zutils.conf +# XDG_CONFIG_HOME defaults to $HOME/.config # This file sets the compressor and options to be used for each format. # The command line options override compressors specified in this file. @@ -15,6 +15,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ +inline const char * name_or_stdin( const char * const name ) + { return ( name[0] == '-' && name[1] == 0 ) ? "(stdin)" : name; } + int readblock( const int fd, uint8_t * const buf, const int size ); int writeblock( const int fd, const uint8_t * const buf, const int size ); bool feed_data( const std::string & filename, const int infd, const int outfd, @@ -31,7 +34,7 @@ bool set_data_feeder( const std::string & filename, int * const infdp, enum { magic_buf_size = 10 }; // >= longest extended magic (bzip2) -// Return format index, or -1 if uncompressed. +// Return format_index, or -1 if uncompressed. // int test_format( const int infd, uint8_t magic_data[], int * const magic_sizep ); |