diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2022-02-03 11:13:22 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2022-02-03 11:13:22 +0000 |
commit | 7ad294387df656cf31b2ad699012fc7c9b71d416 (patch) | |
tree | 0c9496286128b91ad40d65570954491777baf3e4 | |
parent | Adding upstream version 1.10. (diff) | |
download | zutils-7ad294387df656cf31b2ad699012fc7c9b71d416.tar.xz zutils-7ad294387df656cf31b2ad699012fc7c9b71d416.zip |
Adding upstream version 1.11.upstream/1.11
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
-rw-r--r-- | ChangeLog | 22 | ||||
-rw-r--r-- | INSTALL | 7 | ||||
-rw-r--r-- | Makefile.in | 26 | ||||
-rw-r--r-- | NEWS | 33 | ||||
-rw-r--r-- | README | 13 | ||||
-rw-r--r-- | arg_parser.cc | 15 | ||||
-rw-r--r-- | arg_parser.h | 23 | ||||
-rwxr-xr-x | configure | 6 | ||||
-rw-r--r-- | doc/zcat.1 | 23 | ||||
-rw-r--r-- | doc/zcmp.1 | 25 | ||||
-rw-r--r-- | doc/zdiff.1 | 30 | ||||
-rw-r--r-- | doc/zgrep.1 | 29 | ||||
-rw-r--r-- | doc/ztest.1 | 23 | ||||
-rw-r--r-- | doc/zupdate.1 | 29 | ||||
-rw-r--r-- | doc/zutils.info | 154 | ||||
-rw-r--r-- | doc/zutils.texi | 155 | ||||
-rw-r--r-- | rc.cc | 47 | ||||
-rw-r--r-- | rc.h | 19 | ||||
-rw-r--r-- | recursive.cc | 10 | ||||
-rwxr-xr-x | testsuite/check.sh | 16 | ||||
-rw-r--r-- | zcat.cc | 32 | ||||
-rw-r--r-- | zcatgrep.cc | 2 | ||||
-rw-r--r-- | zcmp.cc | 121 | ||||
-rw-r--r-- | zcmpdiff.cc | 9 | ||||
-rw-r--r-- | zdiff.cc | 57 | ||||
-rw-r--r-- | zgrep.cc | 43 | ||||
-rw-r--r-- | ztest.cc | 49 | ||||
-rw-r--r-- | zupdate.cc | 55 | ||||
-rw-r--r-- | zutils.cc | 33 | ||||
-rw-r--r-- | zutils.h | 4 | ||||
-rw-r--r-- | zutilsrc | 1 |
31 files changed, 734 insertions, 377 deletions
@@ -1,3 +1,17 @@ +2022-01-25 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.11 released. + * zcmp.cc, zdiff.cc (main): Fix race returning 1 instead of 2 when a + compressor is not found or when the wrong format is forced. + * zcmp.cc (getnum): Show option name and valid range if error. + * All tools: Show option name if error in option argument. + * Add support for zstd format to all tools. + * 'zdiff -v -V' now prints the version of the diff program used. + * 'zgrep --verbose -V' now prints the version of the grep program used. + * zutils.texi: Document recompression of read-only files by linking. + * zutils.texi: Change GNU Texinfo category to 'Compression'. + (Reported by Alfred M. Szmidt). + 2021-01-05 Antonio Diaz Diaz <antonio@gnu.org> * Version 1.10 released. @@ -37,7 +51,7 @@ * zutils.cc (good_status): Wait for killed child. * Test and document continuation or exit of zcat, zgrep, ztest, and zupdate in case of error. - * configure: Accept appending to CXXFLAGS, 'CXXFLAGS+=OPTIONS'. + * configure: Accept appending to CXXFLAGS; 'CXXFLAGS+=OPTIONS'. 2018-02-13 Antonio Diaz Diaz <antonio@gnu.org> @@ -147,8 +161,8 @@ 2009-10-05 Antonio Diaz Diaz <ant_diaz@teleline.es> * Version 0.6 released. - * zcat.in, zgrep.in: Remove again default compressor. Format of - data read from stdin is now automatically detected. + * zcat.in, zgrep.in: Remove again default compressor. The format of + the data read from stdin is now automatically detected. * Makefile.in: Add option '--name' to help2man invocation. 2009-10-01 Antonio Diaz Diaz <ant_diaz@teleline.es> @@ -180,7 +194,7 @@ * Version 0.1 released. -Copyright (C) 2009-2021 Antonio Diaz Diaz. +Copyright (C) 2009-2022 Antonio Diaz Diaz. This file is a collection of facts, and thus it is not copyrightable, but just in case, you have unlimited permission to copy, distribute, and @@ -1,7 +1,8 @@ Requirements ------------ -You will need a C++11 compiler. (gcc 3.3.6 or newer is recommended). -I use gcc 6.1.0 and 4.1.2, but the code should compile with any standards +You will need a C++98 compiler with suport for 'long long'. +(gcc 3.3.6 or newer is recommended). +I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards compliant compiler. Gcc is available at http://gcc.gnu.org. @@ -75,7 +76,7 @@ After running 'configure', you can run 'make' and 'make install' as explained above. -Copyright (C) 2009-2021 Antonio Diaz Diaz. +Copyright (C) 2009-2022 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/Makefile.in b/Makefile.in index 55a974e..d02d15d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -29,13 +29,13 @@ scripts = zegrep zfgrep all : $(programs) $(scripts) zcat : $(zcat_objs) - $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(zcat_objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(zcat_objs) zcmp : $(zcmp_objs) - $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(zcmp_objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(zcmp_objs) zdiff : $(zdiff_objs) - $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(zdiff_objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(zdiff_objs) zegrep : zegrep.in cat $(VPATH)/zegrep.in > $@ @@ -46,13 +46,13 @@ zfgrep : zfgrep.in chmod a+x zfgrep zgrep : $(zgrep_objs) - $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(zgrep_objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(zgrep_objs) ztest : $(ztest_objs) - $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(ztest_objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(ztest_objs) zupdate : $(zupdate_objs) - $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(zupdate_objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(zupdate_objs) rc.o : rc.cc $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -DSYSCONFDIR=\"$(sysconfdir)\" -c -o $@ $< @@ -91,27 +91,27 @@ man : $(VPATH)/doc/zcat.1 $(VPATH)/doc/zcmp.1 $(VPATH)/doc/zdiff.1 \ $(VPATH)/doc/zcat.1 : zcat help2man -n 'decompress and concatenate files to standard output' \ - -o $@ --no-info ./zcat + -o $@ --info-page=$(pkgname) ./zcat $(VPATH)/doc/zcmp.1 : zcmp help2man -n 'decompress and compare two files byte by byte' \ - -o $@ --no-info ./zcmp + -o $@ --info-page=$(pkgname) ./zcmp $(VPATH)/doc/zdiff.1 : zdiff help2man -n 'decompress and compare two files line by line' \ - -o $@ --no-info ./zdiff + -o $@ --info-page=$(pkgname) ./zdiff $(VPATH)/doc/zgrep.1 : zgrep help2man -n 'search compressed files for a regular expression' \ - -o $@ --no-info ./zgrep + -o $@ --info-page=$(pkgname) ./zgrep $(VPATH)/doc/ztest.1 : ztest help2man -n 'verify the integrity of compressed files' \ - -o $@ --no-info ./ztest + -o $@ --info-page=$(pkgname) ./ztest $(VPATH)/doc/zupdate.1 : zupdate - help2man -n 'recompress bzip2, gzip, xz files to lzip format' \ - -o $@ --no-info ./zupdate + help2man -n 'recompress bzip2, gzip, xz, zstd files to lzip format' \ + -o $@ --info-page=$(pkgname) ./zupdate Makefile : $(VPATH)/configure $(VPATH)/Makefile.in ./config.status @@ -1,9 +1,30 @@ -Changes in version 1.10: +Changes in version 1.11: -A portability issue with Solaris 10 has been fixed. +A race has been fixed in zcmp and zdiff that sometimes made them return 1 +(files differ) instead of 2 (trouble) when a compressor is not found or when +the wrong format is forced. -It has been documented in the manual that 'zgrep -L' fails with GNU grep -versions 3.2 to 3.4 inclusive because of a wrong change reverted in GNU grep -3.5. +In case of error in an argument to a command line option, all tools now show +the name of the option. -'make check' now tests empty input files with all tools except zupdate. +In case of error in a numerical argument to a command line option, zcmp +now shows the name of the option and the range of valid values. + +Support for the zstd format has been added to all tools. This allows, among +other things, zupdating zstd files to lzip format for long-term archiving, +and using zcmp along with the unzcrash tool (from the lziprecover package) +to test zstd files. + +'zdiff --verbose --version' now prints the version of the diff program used +if it supports the option '--version'. + +'zgrep --verbose --version' now prints the version of the grep program used +if it supports the option '--version'. + +It has been documented in the manual how to recompress files with zupdate +from a read-only file system to another place by first linking the files +from the destination directory and then compressing the links: +'ln -s /src/foo.gz . && zupdate foo.gz' + +The texinfo category of the manual has been changed from 'Data Compression' +to 'Compression' to match that of gzip. (Reported by Alfred M. Szmidt). @@ -11,7 +11,7 @@ programs. In particular the option '--recursive' is very efficient in those utilities supporting it. The utilities provided are zcat, zcmp, zdiff, zgrep, ztest, and zupdate. -The formats supported are bzip2, gzip, lzip, and xz. +The formats supported are bzip2, gzip, lzip, xz, and zstd. Zutils uses external compressors. The compressor to be used for each format is configurable at runtime. @@ -21,11 +21,14 @@ gzip's znew. NOTE: Bzip2 and lzip provide well-defined values of exit status, which makes them safe to use with zutils. Gzip and xz may return ambiguous warning -values, making them less reliable back ends for zutils. +values, making them less reliable back ends for zutils. Zstd currently does +not even document its exit status in its man page. FORMAT NOTE 1: The option '--format' allows the processing of a subset -of formats in recursive mode and when trying compressed file names: -'zgrep foo -r --format=bz2,lz somedir somefile.tar'. +of formats in recursive mode and when trying compressed file names. For +example, use the following command to search for the string 'foo' in +gzip and lzip files only: +'zgrep foo -r --format=gz,lz somedir somefile.tar'. FORMAT NOTE 2: If the option '--force-format' is given, the files are passed to the corresponding decompressor without verifying their format, @@ -37,7 +40,7 @@ been compressed. Decompressed is used to refer to data which have undergone the process of decompression. -Copyright (C) 2009-2021 Antonio Diaz Diaz. +Copyright (C) 2009-2022 Antonio Diaz Diaz. This file is free documentation: you have unlimited permission to copy, distribute, and modify it. diff --git a/arg_parser.cc b/arg_parser.cc index 2e40a13..59998ac 100644 --- a/arg_parser.cc +++ b/arg_parser.cc @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2021 Antonio Diaz Diaz. + Copyright (C) 2006-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -35,9 +35,10 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a // Test all long options for either exact match or abbreviated matches. for( int i = 0; options[i].code != 0; ++i ) - if( options[i].name && std::strncmp( options[i].name, &opt[2], len ) == 0 ) + if( options[i].long_name && + std::strncmp( options[i].long_name, &opt[2], len ) == 0 ) { - if( std::strlen( options[i].name ) == len ) // Exact match found + if( std::strlen( options[i].long_name ) == len ) // Exact match found { index = i; exact = true; break; } else if( index < 0 ) index = i; // First nonexact match found else if( options[index].code != options[i].code || @@ -58,19 +59,19 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a } ++argind; - data.push_back( Record( options[index].code ) ); + data.push_back( Record( options[index].code, options[index].long_name ) ); if( opt[len+2] ) // '--<long_option>=<argument>' syntax { if( options[index].has_arg == no ) { - error_ = "option '--"; error_ += options[index].name; + error_ = "option '--"; error_ += options[index].long_name; error_ += "' doesn't allow an argument"; return false; } if( options[index].has_arg == yes && !opt[len+3] ) { - error_ = "option '--"; error_ += options[index].name; + error_ = "option '--"; error_ += options[index].long_name; error_ += "' requires an argument"; return false; } @@ -82,7 +83,7 @@ bool Arg_parser::parse_long_option( const char * const opt, const char * const a { if( !arg || !arg[0] ) { - error_ = "option '--"; error_ += options[index].name; + error_ = "option '--"; error_ += options[index].long_name; error_ += "' requires an argument"; return false; } diff --git a/arg_parser.h b/arg_parser.h index 5629b90..e854838 100644 --- a/arg_parser.h +++ b/arg_parser.h @@ -1,5 +1,5 @@ /* Arg_parser - POSIX/GNU command line argument parser. (C++ version) - Copyright (C) 2006-2021 Antonio Diaz Diaz. + Copyright (C) 2006-2022 Antonio Diaz Diaz. This library is free software. Redistribution and use in source and binary forms, with or without modification, are permitted provided @@ -23,9 +23,9 @@ In case of error, 'error' returns a non-empty error message. 'options' is an array of 'struct Option' terminated by an element - containing a code which is zero. A null name means a short-only - option. A code value outside the unsigned char range means a - long-only option. + containing a code which is zero. A null long_name means a short-only + option. A code value outside the unsigned char range means a long-only + option. Arg_parser normally makes it appear as if all the option arguments were specified before all the non-option arguments for the purposes @@ -48,7 +48,7 @@ public: struct Option { int code; // Short option letter or code ( code != 0 ) - const char * name; // Long option name (maybe null) + const char * long_name; // Long option name (maybe null) Has_arg has_arg; }; @@ -56,8 +56,12 @@ private: struct Record { int code; + std::string parsed_name; std::string argument; - explicit Record( const int c ) : code( c ) {} + explicit Record( const unsigned char c ) + : code( c ), parsed_name( "-" ) { parsed_name += c; } + Record( const int c, const char * const long_name ) + : code( c ), parsed_name( "--" ) { parsed_name += long_name; } explicit Record( const char * const arg ) : code( 0 ), argument( arg ) {} }; @@ -91,6 +95,13 @@ public: else return 0; } + // Full name of the option parsed (short or long). + const std::string & parsed_name( const int i ) const + { + if( i >= 0 && i < arguments() ) return data[i].parsed_name; + else return empty_arg; + } + const std::string & argument( const int i ) const { if( i >= 0 && i < arguments() ) return data[i].argument; @@ -1,12 +1,12 @@ #! /bin/sh # configure script for Zutils - Utilities dealing with compressed files -# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# Copyright (C) 2009-2022 Antonio Diaz Diaz. # # This configure script is free software: you have unlimited permission # to copy, distribute, and modify it. pkgname=zutils -pkgversion=1.10 +pkgversion=1.11 srctrigger=doc/${pkgname}.texi # clear some things potentially inherited from environment. @@ -179,7 +179,7 @@ echo "GREP = ${GREP}" rm -f Makefile cat > Makefile << EOF # Makefile for Zutils - Utilities dealing with compressed files -# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# Copyright (C) 2009-2022 Antonio Diaz Diaz. # This file was generated automatically by configure. Don't edit. # # This Makefile is free software: you have unlimited permission @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH ZCAT "1" "January 2021" "zutils 1.10" "User Commands" +.TH ZCAT "1" "January 2022" "zutils 1.11" "User Commands" .SH NAME zcat \- decompress and concatenate files to standard output .SH SYNOPSIS @@ -21,7 +21,7 @@ same compressed format. If no files are specified, recursive searches examine the current working directory, and nonrecursive searches read standard input. .PP -The formats supported are bzip2, gzip, lzip, and xz. +The formats supported are bzip2, gzip, lzip, xz, and zstd. .PP Exit status is 0 if no errors occurred, 1 otherwise. .SH OPTIONS @@ -54,7 +54,7 @@ number all output lines don't read runtime configuration file .TP \fB\-O\fR, \fB\-\-force\-format=\fR<fmt> -force the format given (bz2, gz, lz, xz) +force the format given (bz2, gz, lz, xz, zst) .TP \fB\-q\fR, \fB\-\-quiet\fR suppress all messages @@ -91,13 +91,28 @@ set compressor and options for lzip format .TP \fB\-\-xz=\fR<command> set compressor and options for xz format +.TP +\fB\-\-zst=\fR<command> +set compressor and options for zstd format .SH "REPORTING BUGS" Report bugs to zutils\-bug@nongnu.org .br Zutils home page: http://www.nongnu.org/zutils/zutils.html .SH COPYRIGHT -Copyright \(co 2021 Antonio Diaz Diaz. +Copyright \(co 2022 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +The full documentation for +.B zcat +is maintained as a Texinfo manual. If the +.B info +and +.B zcat +programs are properly installed at your site, the command +.IP +.B info zutils +.PP +should give you access to the complete manual. @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH ZCMP "1" "January 2021" "zutils 1.10" "User Commands" +.TH ZCMP "1" "January 2022" "zutils 1.11" "User Commands" .SH NAME zcmp \- decompress and compare two files byte by byte .SH SYNOPSIS @@ -12,7 +12,7 @@ starting with 1. A hyphen '\-' used as a file argument means standard input. If any file given is compressed, its decompressed content is used. Compressed files are decompressed on the fly; no temporary files are created. .PP -The formats supported are bzip2, gzip, lzip, and xz. +The formats supported are bzip2, gzip, lzip, xz, and zstd. .PP zcmp compares file1 to file2. The standard input is used only if file1 or file2 refers to standard input. If file2 is omitted zcmp tries the @@ -23,7 +23,7 @@ the corresponding uncompressed file (the name of file1 with the extension removed). .IP \- If file1 is uncompressed, compares it with the decompressed -contents of file1.[lz|bz2|gz|xz] (the first one that is found). +contents of file1.[lz|bz2|gz|zst|xz] (the first one that is found). .PP Exit status is 0 if inputs are identical, 1 if different, 2 if trouble. .SH OPTIONS @@ -53,7 +53,7 @@ compare at most <n> bytes don't read runtime configuration file .TP \fB\-O\fR, \fB\-\-force\-format\fR=\fI\,[\/\fR<f1>][,<f2>] -force the formats given (bz2, gz, lz, xz) +force the formats given (bz2,gz,lz,xz,zst) .TP \fB\-q\fR, \fB\-\-quiet\fR suppress all messages @@ -75,6 +75,9 @@ set compressor and options for lzip format .TP \fB\-\-xz=\fR<command> set compressor and options for xz format +.TP +\fB\-\-zst=\fR<command> +set compressor and options for zstd format .PP Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... @@ -83,8 +86,20 @@ Report bugs to zutils\-bug@nongnu.org .br Zutils home page: http://www.nongnu.org/zutils/zutils.html .SH COPYRIGHT -Copyright \(co 2021 Antonio Diaz Diaz. +Copyright \(co 2022 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +The full documentation for +.B zcmp +is maintained as a Texinfo manual. If the +.B info +and +.B zcmp +programs are properly installed at your site, the command +.IP +.B info zutils +.PP +should give you access to the complete manual. diff --git a/doc/zdiff.1 b/doc/zdiff.1 index 65a34b7..784ef7b 100644 --- a/doc/zdiff.1 +++ b/doc/zdiff.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH ZDIFF "1" "January 2021" "zutils 1.10" "User Commands" +.TH ZDIFF "1" "January 2022" "zutils 1.11" "User Commands" .SH NAME zdiff \- decompress and compare two files line by line .SH SYNOPSIS @@ -12,7 +12,9 @@ input. If any file given is compressed, its decompressed content is used. zdiff is a front end to the program diff and has the limitation that messages from diff refer to temporary file names instead of those specified. .PP -The formats supported are bzip2, gzip, lzip, and xz. +\&'zdiff \fB\-v\fR \fB\-V\fR' prints the version of the diff program used. +.PP +The formats supported are bzip2, gzip, lzip, xz, and zstd. .PP zdiff compares file1 to file2. The standard input is used only if file1 or file2 refers to standard input. If file2 is omitted zdiff tries the @@ -23,7 +25,7 @@ the corresponding uncompressed file (the name of file1 with the extension removed). .IP \- If file1 is uncompressed, compares it with the decompressed -contents of file1.[lz|bz2|gz|xz] (the first one that is found). +contents of file1.[lz|bz2|gz|zst|xz] (the first one that is found). .PP Exit status is 0 if inputs are identical, 1 if different, 2 if trouble. Some options only work if the diff program used supports them. @@ -66,7 +68,7 @@ process only the formats in <list> don't read runtime configuration file .TP \fB\-O\fR, \fB\-\-force\-format\fR=\fI\,[\/\fR<f1>][,<f2>] -force the formats given (bz2, gz, lz, xz) +force the formats given (bz2,gz,lz,xz,zst) .TP \fB\-p\fR, \fB\-\-show\-c\-function\fR show which C function each change is in @@ -89,6 +91,9 @@ use the unified output format \fB\-U\fR, \fB\-\-unified=\fR<n> same as \fB\-u\fR but use <n> lines of context .TP +\fB\-v\fR, \fB\-\-verbose\fR +verbose mode (for \fB\-\-version\fR) +.TP \fB\-w\fR, \fB\-\-ignore\-all\-space\fR ignore all white space .TP @@ -109,13 +114,28 @@ set compressor and options for lzip format .TP \fB\-\-xz=\fR<command> set compressor and options for xz format +.TP +\fB\-\-zst=\fR<command> +set compressor and options for zstd format .SH "REPORTING BUGS" Report bugs to zutils\-bug@nongnu.org .br Zutils home page: http://www.nongnu.org/zutils/zutils.html .SH COPYRIGHT -Copyright \(co 2021 Antonio Diaz Diaz. +Copyright \(co 2022 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +The full documentation for +.B zdiff +is maintained as a Texinfo manual. If the +.B info +and +.B zdiff +programs are properly installed at your site, the command +.IP +.B info zutils +.PP +should give you access to the complete manual. diff --git a/doc/zgrep.1 b/doc/zgrep.1 index 69ed0cd..f3177bb 100644 --- a/doc/zgrep.1 +++ b/doc/zgrep.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH ZGREP "1" "January 2021" "zutils 1.10" "User Commands" +.TH ZGREP "1" "January 2022" "zutils 1.11" "User Commands" .SH NAME zgrep \- search compressed files for a regular expression .SH SYNOPSIS @@ -22,7 +22,9 @@ compressed format. If no files are specified, recursive searches examine the current working directory, and nonrecursive searches read standard input. .PP -The formats supported are bzip2, gzip, lzip, and xz. +\&'zgrep \fB\-\-verbose\fR \fB\-V\fR' prints the version of the grep program used. +.PP +The formats supported are bzip2, gzip, lzip, xz, and zstd. .PP Exit status is 0 if match, 1 if no match, 2 if trouble. Some options only work if the grep program used supports them. @@ -68,10 +70,10 @@ obtain patterns from <file> <pattern> is a set of newline\-separated strings .TP \fB\-h\fR, \fB\-\-no\-filename\fR -suppress the prefixing filename on output +suppress the prefixing file name on output .TP \fB\-H\fR, \fB\-\-with\-filename\fR -print the filename for each match +print the file name for each match .TP \fB\-i\fR, \fB\-\-ignore\-case\fR ignore case distinctions @@ -101,7 +103,7 @@ don't read runtime configuration file show only the part of a line matching <pattern> .TP \fB\-O\fR, \fB\-\-force\-format=\fR<fmt> -force the format given (bz2, gz, lz, xz) +force the format given (bz2, gz, lz, xz, zst) .TP \fB\-q\fR, \fB\-\-quiet\fR suppress all messages @@ -138,6 +140,9 @@ set compressor and options for lzip format .TP \fB\-\-xz=\fR<command> set compressor and options for xz format +.TP +\fB\-\-zst=\fR<command> +set compressor and options for zstd format .PP Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... @@ -146,8 +151,20 @@ Report bugs to zutils\-bug@nongnu.org .br Zutils home page: http://www.nongnu.org/zutils/zutils.html .SH COPYRIGHT -Copyright \(co 2021 Antonio Diaz Diaz. +Copyright \(co 2022 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +The full documentation for +.B zgrep +is maintained as a Texinfo manual. If the +.B info +and +.B zgrep +programs are properly installed at your site, the command +.IP +.B info zutils +.PP +should give you access to the complete manual. diff --git a/doc/ztest.1 b/doc/ztest.1 index 45cda27..210948e 100644 --- a/doc/ztest.1 +++ b/doc/ztest.1 @@ -1,5 +1,5 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH ZTEST "1" "January 2021" "zutils 1.10" "User Commands" +.TH ZTEST "1" "January 2022" "zutils 1.11" "User Commands" .SH NAME ztest \- verify the integrity of compressed files .SH SYNOPSIS @@ -18,7 +18,7 @@ test when testing multiple files. If no files are specified, recursive searches examine the current working directory, and nonrecursive searches read standard input. .PP -The formats supported are bzip2, gzip, lzip, and xz. +The formats supported are bzip2, gzip, lzip, xz, and zstd. .PP Note that error detection in the xz format is broken. First, some xz files lack integrity information. Second, not all xz decompressors can @@ -45,7 +45,7 @@ process only the formats in <list> don't read runtime configuration file .TP \fB\-O\fR, \fB\-\-force\-format=\fR<fmt> -force the format given (bz2, gz, lz, xz) +force the format given (bz2, gz, lz, xz, zst) .TP \fB\-q\fR, \fB\-\-quiet\fR suppress all messages @@ -70,13 +70,28 @@ set compressor and options for lzip format .TP \fB\-\-xz=\fR<command> set compressor and options for xz format +.TP +\fB\-\-zst=\fR<command> +set compressor and options for zstd format .SH "REPORTING BUGS" Report bugs to zutils\-bug@nongnu.org .br Zutils home page: http://www.nongnu.org/zutils/zutils.html .SH COPYRIGHT -Copyright \(co 2021 Antonio Diaz Diaz. +Copyright \(co 2022 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +The full documentation for +.B ztest +is maintained as a Texinfo manual. If the +.B info +and +.B ztest +programs are properly installed at your site, the command +.IP +.B info zutils +.PP +should give you access to the complete manual. diff --git a/doc/zupdate.1 b/doc/zupdate.1 index dcd3d24..b1e6472 100644 --- a/doc/zupdate.1 +++ b/doc/zupdate.1 @@ -1,12 +1,12 @@ .\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. -.TH ZUPDATE "1" "January 2021" "zutils 1.10" "User Commands" +.TH ZUPDATE "1" "January 2022" "zutils 1.11" "User Commands" .SH NAME -zupdate \- recompress bzip2, gzip, xz files to lzip format +zupdate \- recompress bzip2, gzip, xz, zstd files to lzip format .SH SYNOPSIS .B zupdate [\fI\,options\/\fR] [\fI\,files\/\fR] .SH DESCRIPTION -zupdate recompresses files from bzip2, gzip, and xz formats to lzip +zupdate recompresses files from bzip2, gzip, xz, and zstd formats to lzip format. Each original is compared with the new file and then deleted. Only regular files with standard file name extensions are recompressed, other files are ignored. Compressed files are decompressed and then @@ -25,8 +25,10 @@ to be safe and not cause any data loss. Therefore, existing lzip compressed files are never overwritten nor deleted. .PP The names of the original files must have one of the following extensions: -\&'.bz2', '.gz', or '.xz', which are recompressed to '.lz'; -\&'.tbz', '.tbz2', '.tgz', or '.txz', which are recompressed to '.tlz'. +.PP +\&'.bz2', '.gz', '.xz', or '.zst', which are recompressed to '.lz'. +.PP +\&'.tbz', '.tbz2', '.tgz', '.txz', or '.tzst', which are recompressed to '.tlz'. .PP Exit status is 0 if all the compressed files were successfully recompressed (if needed), compared, and deleted (if requested). Non\-zero otherwise. @@ -79,13 +81,28 @@ set compressor and options for lzip format .TP \fB\-\-xz=\fR<command> set compressor and options for xz format +.TP +\fB\-\-zst=\fR<command> +set compressor and options for zstd format .SH "REPORTING BUGS" Report bugs to zutils\-bug@nongnu.org .br Zutils home page: http://www.nongnu.org/zutils/zutils.html .SH COPYRIGHT -Copyright \(co 2021 Antonio Diaz Diaz. +Copyright \(co 2022 Antonio Diaz Diaz. License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> .br This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +The full documentation for +.B zupdate +is maintained as a Texinfo manual. If the +.B info +and +.B zupdate +programs are properly installed at your site, the command +.IP +.B info zutils +.PP +should give you access to the complete manual. diff --git a/doc/zutils.info b/doc/zutils.info index 854100f..a75b183 100644 --- a/doc/zutils.info +++ b/doc/zutils.info @@ -1,6 +1,6 @@ This is zutils.info, produced by makeinfo version 4.13+ from zutils.texi. -INFO-DIR-SECTION Data Compression +INFO-DIR-SECTION Compression START-INFO-DIR-ENTRY * Zutils: (zutils). Utilities dealing with compressed files END-INFO-DIR-ENTRY @@ -11,7 +11,7 @@ File: zutils.info, Node: Top, Next: Introduction, Up: (dir) Zutils Manual ************* -This manual is for Zutils (version 1.10, 5 January 2021). +This manual is for Zutils (version 1.11, 25 January 2022). * Menu: @@ -28,7 +28,7 @@ This manual is for Zutils (version 1.10, 5 January 2021). * Concept index:: Index of concepts - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute, and modify it. @@ -50,7 +50,7 @@ programs. In particular the option '--recursive' is very efficient in those utilities supporting it. The utilities provided are zcat, zcmp, zdiff, zgrep, ztest, and zupdate. -The formats supported are bzip2, gzip, lzip, and xz. +The formats supported are bzip2, gzip, lzip, xz, and zstd. Zutils uses external compressors. The compressor to be used for each format is configurable at runtime. @@ -60,12 +60,14 @@ to gzip's znew. NOTE: Bzip2 and lzip provide well-defined values of exit status, which makes them safe to use with zutils. Gzip and xz may return ambiguous warning -values, making them less reliable back ends for zutils. *Note +values, making them less reliable back ends for zutils. Zstd currently does +not even document its exit status in its man page. *Note compressor-requirements::. FORMAT NOTE 1: The option '--format' allows the processing of a subset -of formats in recursive mode and when trying compressed file names: -'zgrep foo -r --format=bz2,lz somedir somefile.tar'. +of formats in recursive mode and when trying compressed file names. For +example, use the following command to search for the string 'foo' in gzip +and lzip files only: 'zgrep foo -r --format=gz,lz somedir somefile.tar'. FORMAT NOTE 2: If the option '--force-format' is given, the files are passed to the corresponding decompressor without verifying their format, @@ -110,12 +112,14 @@ here. *Note Argument syntax: (arg_parser)Argument syntax. '-V' '--version' Print the version number on the standard output and exit. This version - number should be included in all bug reports. + number should be included in all bug reports. In verbose mode, zdiff + and zgrep print also the version of the diff or grep program used + respectively. '-M FORMAT_LIST' '--format=FORMAT_LIST' Process only the formats listed in the comma-separated FORMAT_LIST. - Valid formats are 'bz2', 'gz', 'lz', 'xz', and 'un' for + Valid formats are 'bz2', 'gz', 'lz', 'xz', 'zst', and 'un' for 'uncompressed', meaning "any file name without a known extension". This option excludes files based on extension, instead of format, because it is more efficient. The exclusion only applies to names @@ -130,6 +134,7 @@ here. *Note Argument syntax: (arg_parser)Argument syntax. gz enables .gz .tgz lz enables .lz .tlz xz enables .xz .txz + zst enables .zst .tzst un enables any other file name '-N' @@ -140,17 +145,18 @@ here. *Note Argument syntax: (arg_parser)Argument syntax. '--gz=COMMAND' '--lz=COMMAND' '--xz=COMMAND' +'--zst=COMMAND' Set program to be used as (de)compressor for the corresponding format. COMMAND may include arguments. For example '--lz='plzip --threads=2''. The program set with '--lz' is used for both compression and - decompression. The other three are used only for decompression. The - name of the program can't begin with '-'. These options override the - values set in 'zutilsrc'. The compression program used must meet three + decompression. The others are used only for decompression. The name of + the program can't begin with '-'. These options override the values + set in 'zutilsrc'. The compression program used must meet three requirements: - 1. When called with the option '-d', it must read compressed data - from the standard input and produce decompressed data on the - standard output. + 1. When called with the option '-d' and without file names, it must + read compressed data from the standard input and produce + decompressed data on the standard output. 2. If the option '-q' is passed to zutils, the compression program must also accept it. @@ -181,7 +187,7 @@ is fairly obvious (and there are further instructions in it): 2. Each non-comment line defines the command to be used for the corresponding format, with the syntax: <format> = <compressor> [options] - where <format> is one of 'bz2', 'gz', 'lz', or 'xz'. + where <format> is one of 'bz2', 'gz', 'lz', 'xz', or 'zst'. File: zutils.info, Node: Zcat, Next: Zcmp, Prev: The zutilsrc file, Up: Top @@ -235,9 +241,10 @@ Exit status is 0 if no errors occurred, 1 otherwise. '-O FORMAT' '--force-format=FORMAT' Force the compressed format given. Valid values for FORMAT are 'bz2', - 'gz', 'lz', and 'xz'. If this option is used, the files are passed to - the corresponding decompressor without verifying their format, and the - exact file name must be given. Other names won't be tried. + 'gz', 'lz', 'xz', and 'zst'. If this option is used, the files are + passed to the corresponding decompressor without verifying their + format, and the exact file name must be given. Other names won't be + tried. '-q' '--quiet' @@ -301,7 +308,7 @@ following: removed). - If FILE1 is uncompressed, compares it with the decompressed contents - of FILE1.[lz|bz2|gz|xz] (the first one that is found). + of FILE1.[lz|bz2|gz|zst|xz] (the first one that is found). An exit status of 0 means no differences were found, 1 means some differences were found, and 2 means trouble. @@ -336,11 +343,11 @@ differences were found, and 2 means trouble. '--force-format=[FORMAT1][,FORMAT2]' Force the compressed formats given. Any of FORMAT1 or FORMAT2 may be omitted and the corresponding format will be automatically detected. - Valid values for FORMAT are 'bz2', 'gz', 'lz', and 'xz'. If at least - one format is specified with this option, the file is passed to the - corresponding decompressor without verifying its format, and the exact - file names of both FILE1 and FILE2 must be given. Other names won't be - tried. + Valid values for FORMAT are 'bz2', 'gz', 'lz', 'xz', and 'zst'. If at + least one format is specified with this option, the file is passed to + the corresponding decompressor without verifying its format, and the + exact file names of both FILE1 and FILE2 must be given. Other names + won't be tried. '-q' '-s' @@ -376,7 +383,7 @@ following: removed). - If FILE1 is uncompressed, compares it with the decompressed contents - of FILE1.[lz|bz2|gz|xz] (the first one that is found). + of FILE1.[lz|bz2|gz|zst|xz] (the first one that is found). An exit status of 0 means no differences were found, 1 means some differences were found, and 2 means trouble. @@ -419,11 +426,11 @@ program used supports them): '--force-format=[FORMAT1][,FORMAT2]' Force the compressed formats given. Any of FORMAT1 or FORMAT2 may be omitted and the corresponding format will be automatically detected. - Valid values for FORMAT are 'bz2', 'gz', 'lz', and 'xz'. If at least - one format is specified with this option, the file is passed to the - corresponding decompressor without verifying its format, and the exact - file names of both FILE1 and FILE2 must be given. Other names won't be - tried. + Valid values for FORMAT are 'bz2', 'gz', 'lz', 'xz', and 'zst'. If at + least one format is specified with this option, the file is passed to + the corresponding decompressor without verifying its format, and the + exact file names of both FILE1 and FILE2 must be given. Other names + won't be tried. '-p' '--show-c-function' @@ -452,6 +459,11 @@ program used supports them): '--unified=N' Same as -u but use N lines of context. +'-v' +'--verbose' + When specified before '--version', print the version of the diff + program used. + '-w' '--ignore-all-space' Ignore all white space. @@ -576,9 +588,10 @@ program used supports them): '-O FORMAT' '--force-format=FORMAT' Force the compressed format given. Valid values for FORMAT are 'bz2', - 'gz', 'lz', and 'xz'. If this option is used, the files are passed to - the corresponding decompressor without verifying their format, and the - exact file name must be given. Other names won't be tried. + 'gz', 'lz', 'xz', and 'zst'. If this option is used, the files are + passed to the corresponding decompressor without verifying their + format, and the exact file name must be given. Other names won't be + tried. '-q' '--quiet' @@ -605,7 +618,8 @@ program used supports them): Select non-matching lines. '--verbose' - Verbose mode. Show error messages. + Verbose mode. Show error messages. When specified before '--version', + print the version of the grep program used. '-w' '--word-regexp' @@ -634,6 +648,10 @@ test when testing multiple files. If no files are specified, recursive searches examine the current working directory, and nonrecursive searches read standard input. + Bzip2, gzip, and lzip are the primary formats. Xz and zstd are optional. +If the decompressor for the xz or zstd formats is not found, the +corresponding files are ignored. + Note that error detection in the xz format is broken. First, some xz files lack integrity information. Second, not all xz decompressors can verify the integrity of all xz files. Third, section 2.1.1.2 'Stream Flags' @@ -654,11 +672,12 @@ compressed file is corrupt or invalid. '-O FORMAT' '--force-format=FORMAT' Force the compressed format given. Valid values for FORMAT are 'bz2', - 'gz', 'lz', and 'xz'. If this option is used, the files are passed to - the corresponding decompressor without verifying their format, and any - files in a format that the decompressor can't understand will fail. - For example, '--force-format=gz' can test gzipped (.gz) and compress'd - (.Z) files if the compressor used is GNU gzip. + 'gz', 'lz', 'xz', and 'zst'. If this option is used, the files are + passed to the corresponding decompressor without verifying their + format, and any files in a format that the decompressor can't + understand will fail. For example, '--force-format=gz' can test + gzipped (.gz) and compress'd (.Z) files if the compressor used is GNU + gzip. '-q' '--quiet' @@ -687,14 +706,14 @@ File: zutils.info, Node: Zupdate, Next: Problems, Prev: Ztest, Up: Top 9 Zupdate ********* -zupdate recompresses files from bzip2, gzip, and xz formats to lzip format. -Each original is compared with the new file and then deleted. Only regular -files with standard file name extensions are recompressed, other files are -ignored. Compressed files are decompressed and then recompressed on the fly; -no temporary files are created. If an error happens while recompressing a -file, zupdate exits immediately without recompressing the rest of the files. -The lzip format is chosen as destination because it is the most appropriate -for long-term data archiving. +zupdate recompresses files from bzip2, gzip, xz, and zstd formats to lzip +format. Each original is compared with the new file and then deleted. Only +regular files with standard file name extensions are recompressed, other +files are ignored. Compressed files are decompressed and then recompressed +on the fly; no temporary files are created. If an error happens while +recompressing a file, zupdate exits immediately without recompressing the +rest of the files. The lzip format is chosen as destination because it is +the most appropriate for long-term data archiving. If no files are specified, recursive searches examine the current working directory, and nonrecursive searches do nothing. @@ -706,17 +725,26 @@ the original file is not deleted. The operation of zupdate is meant to be safe and not cause any data loss. Therefore, existing lzip compressed files are never overwritten nor deleted. + Recompressing files from a read-only file system to another place can be +done by first linking the files from the destination directory and then +compressing the links: 'ln -s /src/foo.gz . && zupdate foo.gz' + Combining the options '--force' and '--keep', as in 'zupdate -f -k *.gz', verifies that there are no differences between each pair of files in a multiformat set of files. The names of the original files must have one of the following extensions: -'.bz2', '.gz', or '.xz', which are recompressed to '.lz'; -'.tbz', '.tbz2', '.tgz', or '.txz', which are recompressed to '.tlz'. +'.bz2', '.gz', '.xz', or '.zst', which are recompressed to '.lz'; +'.tbz', '.tbz2', '.tgz', '.txz', or '.tzst', which are recompressed to +'.tlz'. Keeping the combined extensions ('.tgz' -> '.tlz') may be useful when recompressing Slackware packages, for example. + Bzip2, gzip, and lzip are the primary formats. Xz and zstd are optional. +If the decompressor for the xz or zstd formats is not found, the +corresponding files are ignored. + Recompressing a file is much like copying or moving it; therefore zupdate preserves the access and modification dates, permissions, and, when possible, ownership of the file just as 'cp -p' does. (If the user ID or @@ -816,19 +844,19 @@ Concept index Tag Table: -Node: Top222 -Node: Introduction1151 -Node: Common options3776 -Ref: compressor-requirements5847 -Node: The zutilsrc file6219 -Node: Zcat7180 -Node: Zcmp9743 -Node: Zdiff12233 -Node: Zgrep14973 -Node: Ztest19218 -Node: Zupdate21725 -Node: Problems25409 -Node: Concept index25943 +Node: Top217 +Node: Introduction1147 +Node: Common options3947 +Ref: compressor-requirements6181 +Node: The zutilsrc file6576 +Node: Zcat7544 +Node: Zcmp10119 +Node: Zdiff12620 +Node: Zgrep15478 +Node: Ztest19819 +Node: Zupdate22513 +Node: Problems26607 +Node: Concept index27141 End Tag Table diff --git a/doc/zutils.texi b/doc/zutils.texi index c494185..34a3128 100644 --- a/doc/zutils.texi +++ b/doc/zutils.texi @@ -6,10 +6,10 @@ @finalout @c %**end of header -@set UPDATED 5 January 2021 -@set VERSION 1.10 +@set UPDATED 25 January 2022 +@set VERSION 1.11 -@dircategory Data Compression +@dircategory Compression @direntry * Zutils: (zutils). Utilities dealing with compressed files @end direntry @@ -50,7 +50,7 @@ This manual is for Zutils (version @value{VERSION}, @value{UPDATED}). @end menu @sp 1 -Copyright @copyright{} 2009-2021 Antonio Diaz Diaz. +Copyright @copyright{} 2009-2022 Antonio Diaz Diaz. This manual is free documentation: you have unlimited permission to copy, distribute, and modify it. @@ -74,7 +74,7 @@ those utilities supporting it. @noindent The utilities provided are zcat, zcmp, zdiff, zgrep, ztest, and zupdate.@* -The formats supported are bzip2, gzip, lzip, and xz.@* +The formats supported are bzip2, gzip, lzip, xz, and zstd.@* Zutils uses external compressors. The compressor to be used for each format is configurable at runtime. @@ -84,12 +84,15 @@ gzip's znew. NOTE: Bzip2 and lzip provide well-defined values of exit status, which makes them safe to use with zutils. Gzip and xz may return ambiguous warning -values, making them less reliable back ends for zutils. +values, making them less reliable back ends for zutils. Zstd currently does +not even document its exit status in its man page. @xref{compressor-requirements}. FORMAT NOTE 1: The option @samp{--format} allows the processing of a subset -of formats in recursive mode and when trying compressed file names: -@w{@samp{zgrep foo -r --format=bz2,lz somedir somefile.tar}}. +of formats in recursive mode and when trying compressed file names. For +example, use the following command to search for the string @samp{foo} in +gzip and lzip files only: +@w{@samp{zgrep foo -r --format=gz,lz somedir somefile.tar}}. FORMAT NOTE 2: If the option @samp{--force-format} is given, the files are passed to the corresponding decompressor without verifying their format, @@ -141,17 +144,19 @@ only supports the @samp{--help} form of this option. @itemx --version Print the version number on the standard output and exit. This version number should be included in all bug reports. +In verbose mode, zdiff and zgrep print also the version of the diff or grep +program used respectively. @item -M @var{format_list} @itemx --format=@var{format_list} -Process only the formats listed in the comma-separated -@var{format_list}. Valid formats are @samp{bz2}, @samp{gz}, @samp{lz}, -@samp{xz}, and @samp{un} for @samp{uncompressed}, meaning "any file name -without a known extension". This option excludes files based on -extension, instead of format, because it is more efficient. The -exclusion only applies to names generated automatically (for example -when adding extensions to a file name or when operating recursively on -directories). Files given in the command line are always processed. +Process only the formats listed in the comma-separated @var{format_list}. +Valid formats are @samp{bz2}, @samp{gz}, @samp{lz}, @samp{xz}, @samp{zst}, +and @samp{un} for @samp{uncompressed}, meaning "any file name without a +known extension". This option excludes files based on extension, instead of +format, because it is more efficient. The exclusion only applies to names +generated automatically (for example when adding extensions to a file name +or when operating recursively on directories). Files given in the command +line are always processed. Each format in @var{format_list} enables file names with the following extensions: @@ -161,6 +166,7 @@ extensions: @item gz @tab enables @tab .gz .tgz @item lz @tab enables @tab .lz .tlz @item xz @tab enables @tab .xz .txz +@item zst @tab enables @tab .zst .tzst @item un @tab enables @tab any other file name @end multitable @@ -172,19 +178,21 @@ Don't read the runtime configuration file @samp{zutilsrc}. @itemx --gz=@var{command} @itemx --lz=@var{command} @itemx --xz=@var{command} +@itemx --zst=@var{command} Set program to be used as (de)compressor for the corresponding format. @var{command} may include arguments. For example @w{@samp{--lz='plzip --threads=2'}}. The program set with @samp{--lz} is -used for both compression and decompression. The other three are used only -for decompression. The name of the program can't begin with @samp{-}. These +used for both compression and decompression. The others are used only for +decompression. The name of the program can't begin with @samp{-}. These options override the values set in @file{zutilsrc}. The compression program used must meet three requirements: @anchor{compressor-requirements} @enumerate @item -When called with the option @samp{-d}, it must read compressed data from -the standard input and produce decompressed data on the standard output. +When called with the option @samp{-d} and without file names, it must read +compressed data from the standard input and produce decompressed data on the +standard output. @item If the option @samp{-q} is passed to zutils, the compression program must also accept it. @@ -220,7 +228,8 @@ format, with the syntax: @example <format> = <compressor> [options] @end example -where <format> is one of @samp{bz2}, @samp{gz}, @samp{lz}, or @samp{xz}. +where <format> is one of @samp{bz2}, @samp{gz}, @samp{lz}, @samp{xz}, or +@samp{zst}. @end enumerate @@ -278,10 +287,10 @@ Number all output lines, starting with 1. The line count is unlimited. @item -O @var{format} @itemx --force-format=@var{format} Force the compressed format given. Valid values for @var{format} are -@samp{bz2}, @samp{gz}, @samp{lz}, and @samp{xz}. If this option is used, -the files are passed to the corresponding decompressor without verifying -their format, and the exact file name must be given. Other names won't -be tried. +@samp{bz2}, @samp{gz}, @samp{lz}, @samp{xz}, and @samp{zst}. If this option +is used, the files are passed to the corresponding decompressor without +verifying their format, and the exact file name must be given. Other names +won't be tried. @item -q @itemx --quiet @@ -350,7 +359,7 @@ the corresponding uncompressed file (the name of @var{file1} with the extension removed). @item If @var{file1} is uncompressed, compares it with the decompressed -contents of @var{file1}.[lz|bz2|gz|xz] (the first one that is found). +contents of @var{file1}.[lz|bz2|gz|zst|xz] (the first one that is found). @end itemize @noindent @@ -387,13 +396,13 @@ Compare at most @var{count} input bytes. @item -O [@var{format1}][,@var{format2}] @itemx --force-format=[@var{format1}][,@var{format2}] -Force the compressed formats given. Any of @var{format1} or -@var{format2} may be omitted and the corresponding format will be -automatically detected. Valid values for @var{format} are @samp{bz2}, -@samp{gz}, @samp{lz}, and @samp{xz}. If at least one format is specified -with this option, the file is passed to the corresponding decompressor -without verifying its format, and the exact file names of both -@var{file1} and @var{file2} must be given. Other names won't be tried. +Force the compressed formats given. Any of @var{format1} or @var{format2} +may be omitted and the corresponding format will be automatically detected. +Valid values for @var{format} are @samp{bz2}, @samp{gz}, @samp{lz}, +@samp{xz}, and @samp{zst}. If at least one format is specified with this +option, the file is passed to the corresponding decompressor without +verifying its format, and the exact file names of both @var{file1} and +@var{file2} must be given. Other names won't be tried. @item -q @itemx -s @@ -434,7 +443,7 @@ the corresponding uncompressed file (the name of @var{file1} with the extension removed). @item If @var{file1} is uncompressed, compares it with the decompressed -contents of @var{file1}.[lz|bz2|gz|xz] (the first one that is found). +contents of @var{file1}.[lz|bz2|gz|zst|xz] (the first one that is found). @end itemize @noindent @@ -478,13 +487,13 @@ Ignore case differences in file contents. @item -O [@var{format1}][,@var{format2}] @itemx --force-format=[@var{format1}][,@var{format2}] -Force the compressed formats given. Any of @var{format1} or -@var{format2} may be omitted and the corresponding format will be -automatically detected. Valid values for @var{format} are @samp{bz2}, -@samp{gz}, @samp{lz}, and @samp{xz}. If at least one format is specified -with this option, the file is passed to the corresponding decompressor -without verifying its format, and the exact file names of both -@var{file1} and @var{file2} must be given. Other names won't be tried. +Force the compressed formats given. Any of @var{format1} or @var{format2} +may be omitted and the corresponding format will be automatically detected. +Valid values for @var{format} are @samp{bz2}, @samp{gz}, @samp{lz}, +@samp{xz}, and @samp{zst}. If at least one format is specified with this +option, the file is passed to the corresponding decompressor without +verifying its format, and the exact file names of both @var{file1} and +@var{file2} must be given. Other names won't be tried. @item -p @itemx --show-c-function @@ -513,6 +522,11 @@ Use the unified output format. @itemx --unified=@var{n} Same as -u but use @var{n} lines of context. +@item -v +@itemx --verbose +When specified before @samp{--version}, print the version of the diff +program used. + @item -w @itemx --ignore-all-space Ignore all white space. @@ -644,10 +658,10 @@ Show only the part of matching lines that actually matches @var{pattern}. @item -O @var{format} @itemx --force-format=@var{format} Force the compressed format given. Valid values for @var{format} are -@samp{bz2}, @samp{gz}, @samp{lz}, and @samp{xz}. If this option is used, -the files are passed to the corresponding decompressor without verifying -their format, and the exact file name must be given. Other names won't -be tried. +@samp{bz2}, @samp{gz}, @samp{lz}, @samp{xz}, and @samp{zst}. If this option +is used, the files are passed to the corresponding decompressor without +verifying their format, and the exact file name must be given. Other names +won't be tried. @item -q @itemx --quiet @@ -674,7 +688,8 @@ Suppress error messages about nonexistent or unreadable files. Select non-matching lines. @item --verbose -Verbose mode. Show error messages. +Verbose mode. Show error messages. When specified before @samp{--version}, +print the version of the grep program used. @item -w @itemx --word-regexp @@ -703,6 +718,10 @@ test when testing multiple files. If no files are specified, recursive searches examine the current working directory, and nonrecursive searches read standard input. +Bzip2, gzip, and lzip are the primary formats. Xz and zstd are optional. If +the decompressor for the xz or zstd formats is not found, the corresponding +files are ignored. + Note that error detection in the xz format is broken. First, some xz files lack integrity information. Second, not all xz decompressors can @uref{http://www.nongnu.org/lzip/xz_inadequate.html#fragmented,,verify the integrity} @@ -730,11 +749,11 @@ ztest supports the following options: @item -O @var{format} @itemx --force-format=@var{format} Force the compressed format given. Valid values for @var{format} are -@samp{bz2}, @samp{gz}, @samp{lz}, and @samp{xz}. If this option is used, the -files are passed to the corresponding decompressor without verifying their -format, and any files in a format that the decompressor can't understand -will fail. For example, @samp{--force-format=gz} can test gzipped (.gz) and -compress'd (.Z) files if the compressor used is GNU gzip. +@samp{bz2}, @samp{gz}, @samp{lz}, @samp{xz}, and @samp{zst}. If this option +is used, the files are passed to the corresponding decompressor without +verifying their format, and any files in a format that the decompressor +can't understand will fail. For example, @samp{--force-format=gz} can test +gzipped (.gz) and compress'd (.Z) files if the compressor used is GNU gzip. @item -q @itemx --quiet @@ -763,14 +782,14 @@ Further -v's increase the verbosity level. @chapter Zupdate @cindex zupdate -zupdate recompresses files from bzip2, gzip, and xz formats to lzip format. -Each original is compared with the new file and then deleted. Only regular -files with standard file name extensions are recompressed, other files are -ignored. Compressed files are decompressed and then recompressed on the fly; -no temporary files are created. If an error happens while recompressing a -file, zupdate exits immediately without recompressing the rest of the files. -The lzip format is chosen as destination because it is the most appropriate -for long-term data archiving. +zupdate recompresses files from bzip2, gzip, xz, and zstd formats to lzip +format. Each original is compared with the new file and then deleted. Only +regular files with standard file name extensions are recompressed, other +files are ignored. Compressed files are decompressed and then recompressed +on the fly; no temporary files are created. If an error happens while +recompressing a file, zupdate exits immediately without recompressing the +rest of the files. The lzip format is chosen as destination because it is +the most appropriate for long-term data archiving. If no files are specified, recursive searches examine the current working directory, and nonrecursive searches do nothing. @@ -782,21 +801,29 @@ and the original file is not deleted. The operation of zupdate is meant to be safe and not cause any data loss. Therefore, existing lzip compressed files are never overwritten nor deleted. +Recompressing files from a read-only file system to another place can be +done by first linking the files from the destination directory and then +compressing the links: @w{@samp{ln -s /src/foo.gz . && zupdate foo.gz}} + Combining the options @samp{--force} and @samp{--keep}, as in @w{@samp{zupdate -f -k *.gz}}, verifies that there are no differences between each pair of files in a multiformat set of files. The names of the original files must have one of the following extensions:@* -@samp{.bz2}, @samp{.gz}, or @samp{.xz}, which are recompressed to -@samp{.lz};@* -@samp{.tbz}, @samp{.tbz2}, @samp{.tgz}, or @samp{.txz}, which are -recompressed to @samp{.tlz}.@* +@samp{.bz2}, @samp{.gz}, @samp{.xz}, or @samp{.zst}, which are recompressed +to @samp{.lz};@* +@samp{.tbz}, @samp{.tbz2}, @samp{.tgz}, @samp{.txz}, or @samp{.tzst}, which +are recompressed to @samp{.tlz}.@* Keeping the combined extensions (@samp{.tgz} --> @samp{.tlz}) may be useful when recompressing Slackware packages, for example. +Bzip2, gzip, and lzip are the primary formats. Xz and zstd are optional. If +the decompressor for the xz or zstd formats is not found, the corresponding +files are ignored. + Recompressing a file is much like copying or moving it; therefore zupdate preserves the access and modification dates, permissions, and, when -possible, ownership of the file just as @samp{cp -p} does. (If the user ID or +possible, ownership of the file just as @w{@samp{cp -p}} does. (If the user ID or the group ID can't be duplicated, the file permission bits S_ISUID and S_ISGID are cleared). @@ -1,5 +1,5 @@ /* Zutils - Utilities dealing with compressed files - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,12 +37,12 @@ int verbosity = 0; namespace { const char * const config_file_name = "zutilsrc"; -const char * const program_year = "2021"; +const char * const program_year = "2022"; std::string compressor_names[num_formats] = - { "bzip2", "gzip", "lzip", "xz" }; // default compressor names + { "bzip2", "gzip", "lzip", "xz", "zstd" }; // default compressor names -// args to compressors read from rc or from options --[bglx]z, maybe empty +// args to compressors read from rc or from options like --lz, maybe empty std::vector< std::string > compressor_args[num_formats]; // vector of enabled formats plus [num_formats] for uncompressed. @@ -60,6 +60,8 @@ const struct { const char * from; const char * to; int format_index; } { ".tlz", ".tar", fmt_lz }, { ".xz", "", fmt_xz }, { ".txz", ".tar", fmt_xz }, + { ".zst", "", fmt_zst }, + { ".tzst", ".tar", fmt_zst }, { 0, 0, -1 } }; @@ -83,7 +85,7 @@ int my_fgetc( FILE * const f ) } -// Returns the parity of escapes (backslashes) at the end of a string. +// Return the parity of escapes (backslashes) at the end of a string. bool trailing_escape( const std::string & s ) { unsigned len = s.size(); @@ -95,7 +97,7 @@ bool trailing_escape( const std::string & s ) /* Read a line discarding comments, leading whitespace, and blank lines. Escaped newlines are discarded. - Returns the empty string if at EOF. + Return the empty string if at EOF. */ const std::string & my_fgets( FILE * const f, int & linenum ) { @@ -186,7 +188,7 @@ bool parse_rc_line( const std::string & line, } - // Returns 0 for success, 1 for file not found, 2 for syntax error. + // Return 0 if success, 1 if file not found, 2 if syntax error. int process_rcfile( const std::string & name ) { FILE * const f = std::fopen( name.c_str(), "r" ); @@ -217,7 +219,7 @@ bool enabled_format( const int format_index ) } -void parse_format_list( const std::string & arg ) +void parse_format_list( const std::string & arg, const char * const pn ) { const std::string un( "uncompressed" ); bool error = arg.empty(); @@ -236,17 +238,22 @@ void parse_format_list( const std::string & arg ) { error = true; break; } enabled_formats[format_index] = true; } - if( error ) - { show_error( "Bad argument for option '--format'." ); std::exit( 1 ); } + if( !error ) return; + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Bad argument in option '%s'.\n", + program_name, pn ); + std::exit( 1 ); } -int parse_format_type( const std::string & arg ) +int parse_format_type( const std::string & arg, const char * const pn ) { for( int i = 0; i < num_formats; ++i ) if( arg == format_names[i] ) return i; - show_error( "Bad argument for option '--force-format'." ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Bad argument in option '%s'.\n", + program_name, pn ); std::exit( 1 ); } @@ -322,10 +329,24 @@ void show_help_addr() } -void show_version() +void show_version( const char * const command ) { std::printf( "%s (zutils) %s\n", program_name, PROGVERSION ); std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); + if( command && verbosity >= 1 ) + { + FILE * const f = popen( command, "r" ); + if( f ) + { + char command_version[1024] = { 0 }; + const int rd = std::fread( command_version, 1, sizeof command_version, f ); + pclose( f ); + int i = 0; + while( i + 1 < rd && command_version[i] != '\n' ) ++i; + command_version[i] = 0; + if( command_version[0] ) std::printf( "Using %s\n", command_version ); + } + } std::printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n" "This is free software: you are free to change and redistribute it.\n" "There is NO WARRANTY, to the extent permitted by law.\n" ); @@ -1,5 +1,5 @@ /* Zutils - Utilities dealing with compressed files - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,16 +15,17 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -enum { fmt_bz2, fmt_gz, fmt_lz, fmt_xz, num_formats }; // format_index -const char * const format_names[num_formats] = { "bz2", "gz", "lz", "xz" }; +enum { fmt_bz2, fmt_gz, fmt_lz, fmt_xz, fmt_zst, num_formats }; // format_index +const char * const format_names[num_formats] = + { "bz2", "gz", "lz", "xz", "zst" }; const char * const simple_extensions[num_formats] = - { ".bz2", ".gz", ".lz", ".xz" }; + { ".bz2", ".gz", ".lz", ".xz", ".zst" }; const int format_order[num_formats] = - { fmt_lz, fmt_bz2, fmt_gz, fmt_xz }; // search order + { fmt_lz, fmt_bz2, fmt_gz, fmt_zst, fmt_xz }; // search order bool enabled_format( const int format_index ); -void parse_format_list( const std::string & arg ); -int parse_format_type( const std::string & arg ); +void parse_format_list( const std::string & arg, const char * const pn ); +int parse_format_type( const std::string & arg, const char * const pn ); int extension_index( const std::string & name ); // -1 if unknown int extension_format( const int eindex ); // -1 if uncompressed @@ -46,7 +47,7 @@ const char * get_compressor_name( const int format_index ); const std::vector< std::string > & get_compressor_args( const int format_index ); void show_help_addr(); -void show_version(); +void show_version( const char * const command = 0 ); void show_error( const char * const msg, const int errcode = 0, const bool help = false ); void show_file_error( const char * const filename, const char * const msg, @@ -56,7 +57,7 @@ void show_close_error( const char * const prog_name = "data feeder" ); void show_exec_error( const char * const prog_name ); void show_fork_error( const char * const prog_name ); -// Returns exit status of child process 'pid', or 'eretval' in case of error. +// Return exit status of child process 'pid', or 'eretval' in case of error. // int wait_for_child( const pid_t pid, const char * const name, const int eretval = 2, const bool isgzxz = false ); diff --git a/recursive.cc b/recursive.cc index a69e117..c5f57bd 100644 --- a/recursive.cc +++ b/recursive.cc @@ -1,5 +1,5 @@ /* Zutils - Utilities dealing with compressed files - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,7 +15,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -/* Returns true if full_name is a regular file with an enabled extension +/* Return true if full_name is a regular file with an enabled extension or (a link to) a directory. */ bool test_full_name( const std::string & full_name, const struct stat * stp, const bool follow ) @@ -46,9 +46,9 @@ bool test_full_name( const std::string & full_name, const struct stat * stp, } -/* Returns in input_filename the next filename, or "." for stdin. - ("." was chosen because it is not a valid filename). - Sets 'error' to true if a directory fails to open. */ +/* Return in input_filename the next file name, or "." for stdin. + ("." was chosen instead of "-" because "." is not a valid file name). + Set 'error' to true if a directory fails to open. */ bool next_filename( std::list< std::string > & filenames, std::string & input_filename, bool & error, const int recursive, const bool ignore_stdin = false, diff --git a/testsuite/check.sh b/testsuite/check.sh index 6a6ef38..b97abd7 100755 --- a/testsuite/check.sh +++ b/testsuite/check.sh @@ -1,6 +1,6 @@ #! /bin/sh # check script for Zutils - Utilities dealing with compressed files -# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# Copyright (C) 2009-2022 Antonio Diaz Diaz. # # This script is free software: you have unlimited permission # to copy, distribute, and modify it. @@ -194,8 +194,7 @@ done "${ZCMP}" -Nq --force-format=lz in.lz [ $? = 2 ] || test_failed $LINENO "${ZCMP}" -Nq --force-format=lz in.gz in.lz -r=$? -{ [ $r = 1 ] || [ $r = 2 ] ; } || test_failed $LINENO +[ $? = 2 ] || test_failed $LINENO "${ZCMP}" -Nq -i 100BB in in [ $? = 2 ] || test_failed $LINENO "${ZCMP}" -Nq -i 100BB:100 in in @@ -206,6 +205,8 @@ r=$? [ $? = 2 ] || test_failed $LINENO "${ZCMP}" -N -q -n 100BB in in [ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -Nq --gz=bad-gzip in.gz in.lz +[ $? = 2 ] || test_failed $LINENO "${ZCMP}" -N --bad-option in in 2> /dev/null [ $? = 2 ] || test_failed $LINENO @@ -255,11 +256,12 @@ done [ $? = 2 ] || test_failed $LINENO "${ZDIFF}" -N --bz2='-bzip2' in.bz2 2> /dev/null [ $? = 2 ] || test_failed $LINENO -"${ZDIFF}" -Nq --force-format=bz2 in.bz2 2> /dev/null +"${ZDIFF}" -N --brief --force-format=bz2 in.bz2 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --brief --force-format=,lz in.lz in.bz2 > /dev/null 2>&1 +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --brief --gz=bad-gzip in.gz in.lz > /dev/null 2>&1 [ $? = 2 ] || test_failed $LINENO -"${ZDIFF}" -N -q --force-format=,lz in.lz in.bz2 > /dev/null 2>&1 -r=$? -{ [ $r = 1 ] || [ $r = 2 ] ; } || test_failed $LINENO "${ZDIFF}" -N --bad-option 2> /dev/null [ $? = 2 ] || test_failed $LINENO @@ -1,5 +1,5 @@ /* Zcat - decompress and concatenate files to standard output - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ #include <stdint.h> #include <unistd.h> #include <sys/stat.h> -#if defined(__MSVCRT__) || defined(__OS2__) +#if defined __MSVCRT__ || defined __OS2__ #include <io.h> #endif @@ -102,7 +102,7 @@ void show_help() "same compressed format.\n" "\nIf no files are specified, recursive searches examine the current\n" "working directory, and nonrecursive searches read standard input.\n" - "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" + "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n" "\nUsage: zcat [options] [files]\n" "\nExit status is 0 if no errors occurred, 1 otherwise.\n" "\nOptions:\n" @@ -115,7 +115,7 @@ void show_help() " -M, --format=<list> process only the formats in <list>\n" " -n, --number number all output lines\n" " -N, --no-rcfile don't read runtime configuration file\n" - " -O, --force-format=<fmt> force the format given (bz2, gz, lz, xz)\n" + " -O, --force-format=<fmt> force the format given (bz2, gz, lz, xz, zst)\n" " -q, --quiet suppress all messages\n" " -r, --recursive operate recursively on directories\n" " -R, --dereference-recursive recursively follow symbolic links\n" @@ -127,7 +127,8 @@ void show_help() " --bz2=<command> set compressor and options for bzip2 format\n" " --gz=<command> set compressor and options for gzip format\n" " --lz=<command> set compressor and options for lzip format\n" - " --xz=<command> set compressor and options for xz format\n" ); + " --xz=<command> set compressor and options for xz format\n" + " --zst=<command> set compressor and options for zstd format\n" ); show_help_addr(); } @@ -256,7 +257,7 @@ bool cat( int infd, const int format_index, const std::string & input_filename, int main( const int argc, const char * const argv[] ) { - enum { verbose_opt = 256, bz2_opt, gz_opt, lz_opt, xz_opt }; + enum { verbose_opt = 256, bz2_opt, gz_opt, lz_opt, xz_opt, zst_opt }; int format_index = -1; int recursive = 0; // 1 = '-r', 2 = '-R' std::list< std::string > filenames; @@ -289,11 +290,12 @@ int main( const int argc, const char * const argv[] ) { 'v', "show-nonprinting", Arg_parser::no }, // cat { 'V', "version", Arg_parser::no }, { verbose_opt, "verbose", Arg_parser::no }, - { bz2_opt, "bz2", Arg_parser::yes }, - { gz_opt, "gz", Arg_parser::yes }, - { lz_opt, "lz", Arg_parser::yes }, - { xz_opt, "xz", Arg_parser::yes }, - { 0 , 0, Arg_parser::no } }; + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { zst_opt, "zst", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option @@ -306,6 +308,7 @@ int main( const int argc, const char * const argv[] ) { const int code = parser.code( argind ); if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); const std::string & arg = parser.argument( argind ); switch( code ) { @@ -321,11 +324,11 @@ int main( const int argc, const char * const argv[] ) case 'h': show_help(); return 0; case 'l': break; case 'L': break; - case 'M': parse_format_list( arg ); break; + case 'M': parse_format_list( arg, pn ); break; case 'n': if( cat_options.number_lines == 0 ) { cat_options.number_lines = 2; } break; case 'N': break; - case 'O': format_index = parse_format_type( arg ); break; + case 'O': format_index = parse_format_type( arg, pn ); break; case 'q': verbosity = -1; break; case 'r': recursive = 1; break; case 'R': recursive = 2; break; @@ -339,11 +342,12 @@ int main( const int argc, const char * const argv[] ) case gz_opt: parse_compressor( arg, fmt_gz, 1 ); break; case lz_opt: parse_compressor( arg, fmt_lz, 1 ); break; case xz_opt: parse_compressor( arg, fmt_xz, 1 ); break; + case zst_opt: parse_compressor( arg, fmt_zst, 1 ); break; default : internal_error( "uncaught option." ); } } // end process options -#if defined(__MSVCRT__) || defined(__OS2__) +#if defined __MSVCRT__ || defined __OS2__ setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif diff --git a/zcatgrep.cc b/zcatgrep.cc index 31d54e6..ecbf359 100644 --- a/zcatgrep.cc +++ b/zcatgrep.cc @@ -1,5 +1,5 @@ /* Common code for zcat and zgrep - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1,5 +1,5 @@ /* Zcmp - decompress and compare two files byte by byte - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ #include <stdint.h> #include <unistd.h> #include <sys/stat.h> -#if defined(__MSVCRT__) || defined(__OS2__) +#if defined __MSVCRT__ || defined __OS2__ #include <io.h> #endif @@ -55,7 +55,7 @@ void show_help() "starting with 1. A hyphen '-' used as a file argument means standard input.\n" "If any file given is compressed, its decompressed content is used. Compressed\n" "files are decompressed on the fly; no temporary files are created.\n" - "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" + "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n" "\nUsage: zcmp [options] file1 [file2]\n" "\nzcmp compares file1 to file2. The standard input is used only if file1 or\n" "file2 refers to standard input. If file2 is omitted zcmp tries the\n" @@ -64,7 +64,7 @@ void show_help() " the corresponding uncompressed file (the name of file1 with the\n" " extension removed).\n" "\n - If file1 is uncompressed, compares it with the decompressed\n" - " contents of file1.[lz|bz2|gz|xz] (the first one that is found).\n" + " contents of file1.[lz|bz2|gz|zst|xz] (the first one that is found).\n" "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" "\nOptions:\n" " -h, --help display this help and exit\n" @@ -75,7 +75,7 @@ void show_help() " -M, --format=<list> process only the formats in <list>\n" " -n, --bytes=<n> compare at most <n> bytes\n" " -N, --no-rcfile don't read runtime configuration file\n" - " -O, --force-format=[<f1>][,<f2>] force the formats given (bz2, gz, lz, xz)\n" + " -O, --force-format=[<f1>][,<f2>] force the formats given (bz2,gz,lz,xz,zst)\n" " -q, --quiet suppress all messages\n" " -s, --silent (same as --quiet)\n" " -v, --verbose verbose mode (same as --list)\n" @@ -83,22 +83,60 @@ void show_help() " --gz=<command> set compressor and options for gzip format\n" " --lz=<command> set compressor and options for lzip format\n" " --xz=<command> set compressor and options for xz format\n" + " --zst=<command> set compressor and options for zstd format\n" "\nNumbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); show_help_addr(); } -long long getnum( const char * const ptr, const char ** const tailp = 0, +// separate large numbers >= 100_000 in groups of 3 digits using '_' +const char * format_num3( long long num ) + { + const char * const si_prefix = "kMGTPEZY"; + const char * const binary_prefix = "KMGTPEZY"; + enum { buffers = 8, bufsize = 4 * sizeof (long long) }; + static char buffer[buffers][bufsize]; // circle of static buffers for printf + static int current = 0; + + char * const buf = buffer[current++]; current %= buffers; + char * p = buf + bufsize - 1; // fill the buffer backwards + *p = 0; // terminator + const bool negative = num < 0; + if( negative ) num = -num; + char prefix = 0; // try binary first, then si + for( int i = 0; i < 8 && num >= 1024 && num % 1024 == 0; ++i ) + { num /= 1024; prefix = binary_prefix[i]; } + if( prefix ) *(--p) = 'i'; + else + for( int i = 0; i < 8 && num >= 1000 && num % 1000 == 0; ++i ) + { num /= 1000; prefix = si_prefix[i]; } + if( prefix ) *(--p) = prefix; + const bool split = num >= 100000; + + for( int i = 0; ; ) + { + *(--p) = num % 10 + '0'; num /= 10; if( num == 0 ) break; + if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; } + } + if( negative ) *(--p) = '-'; + return p; + } + + +long long getnum( const char * const arg, const char * const option_name, + const char ** const tailp = 0, const long long llimit = 0, const long long ulimit = LLONG_MAX ) { char * tail; errno = 0; - long long result = strtoll( ptr, &tail, 0 ); - if( tail == ptr ) + long long result = strtoll( arg, &tail, 0 ); + if( tail == arg ) { - show_error( "Bad or missing numerical argument.", 0, true ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Bad or missing numerical argument in " + "option '%s'.\n", program_name, option_name ); std::exit( 2 ); } if( result < 0 ) errno = ERANGE; @@ -126,7 +164,9 @@ long long getnum( const char * const ptr, const char ** const tailp = 0, } if( exponent < 0 ) { - show_error( "Bad multiplier in numerical argument.", 0, true ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Bad multiplier in numerical argument of " + "option '%s'.\n", program_name, option_name ); std::exit( 2 ); } for( int i = 0; i < exponent; ++i ) @@ -138,7 +178,10 @@ long long getnum( const char * const ptr, const char ** const tailp = 0, if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; if( errno ) { - show_error( "Numerical argument out of limits." ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Numerical argument out of limits [%s,%s] " + "in option '%s'.\n", program_name, format_num3( llimit ), + format_num3( ulimit ), option_name ); std::exit( 2 ); } if( tailp ) *tailp = tail; @@ -146,16 +189,19 @@ long long getnum( const char * const ptr, const char ** const tailp = 0, } -void parse_ignore_initial( const char * const arg, long long ignore_initial[2] ) +void parse_ignore_initial( const char * const arg, const char * const pn, + long long ignore_initial[2] ) { const char * tail; - ignore_initial[0] = getnum( arg, &tail ); + ignore_initial[0] = getnum( arg, pn, &tail ); if( *tail == ':' || *tail == ',' ) - ignore_initial[1] = getnum( ++tail ); + ignore_initial[1] = getnum( ++tail, pn ); else if( *tail == 0 ) ignore_initial[1] = ignore_initial[0]; else { - show_error( "Bad separator in argument of '--ignore-initial'", 0, true ); + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Bad separator in argument of option '%s'.\n", + program_name, pn ); std::exit( 2 ); } } @@ -165,7 +211,7 @@ bool skip_ignore_initial( const long long ignore_initial, const int infd ) { if( ignore_initial > 0 ) { - enum { buffer_size = 4096 }; + const int buffer_size = 4096; long long rest = ignore_initial; uint8_t buffer[buffer_size]; while( rest > 0 ) @@ -218,7 +264,8 @@ int block_compare( const uint8_t * const buffer0, int cmp( const long long max_size, const int infd[2], - const std::string filenames[2], const bool print_bytes ) + const std::string filenames[2], bool finished[2], + const bool print_bytes ) { const int buffer_size = 4096; unsigned long long byte_number = 1; @@ -241,11 +288,11 @@ int cmp( const long long max_size, const int infd[2], { rd[i] = readblock( infd[i], buffer[i], size ); if( rd[i] != size && errno ) - { - show_file_error( filenames[i].c_str(), "Read error", errno ); - return 2; - } + { show_file_error( filenames[i].c_str(), "Read error", errno ); + return 2; } } + for( int i = 0; i < 2; ++i ) + if( rd[i] < size ) finished[i] = true; const int min_rd = std::min( rd[0], rd[1] ); buffer0[min_rd] = 0; // sentinels for the block compare @@ -319,7 +366,7 @@ int cmp( const long long max_size, const int infd[2], int main( const int argc, const char * const argv[] ) { - enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt }; + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt }; // number of initial bytes ignored for each file long long ignore_initial[2] = { 0, 0 }; long long max_size = -1; // < 0 means unlimited size @@ -342,11 +389,12 @@ int main( const int argc, const char * const argv[] ) { 's', "silent", Arg_parser::no }, { 'v', "verbose", Arg_parser::no }, { 'V', "version", Arg_parser::no }, - { bz2_opt, "bz2", Arg_parser::yes }, - { gz_opt, "gz", Arg_parser::yes }, - { lz_opt, "lz", Arg_parser::yes }, - { xz_opt, "xz", Arg_parser::yes }, - { 0 , 0, Arg_parser::no } }; + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { zst_opt, "zst", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option @@ -359,17 +407,18 @@ int main( const int argc, const char * const argv[] ) { const int code = parser.code( argind ); if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); const std::string & arg = parser.argument( argind ); switch( code ) { case 'b': print_bytes = true; break; case 'h': show_help(); return 0; - case 'i': parse_ignore_initial( arg.c_str(), ignore_initial ); break; + case 'i': parse_ignore_initial( arg.c_str(), pn, ignore_initial ); break; case 'l': verbosity = 1; break; - case 'M': parse_format_list( arg ); break; - case 'n': max_size = getnum( arg.c_str() ); break; + case 'M': parse_format_list( arg, pn ); break; + case 'n': max_size = getnum( arg.c_str(), pn ); break; case 'N': break; - case 'O': parse_format_types2( arg, format_types ); break; + case 'O': parse_format_types2( arg, pn, format_types ); break; case 'q': case 's': verbosity = -1; break; case 'v': verbosity = 1; break; @@ -378,18 +427,19 @@ int main( const int argc, const char * const argv[] ) case gz_opt: parse_compressor( arg, fmt_gz ); break; case lz_opt: parse_compressor( arg, fmt_lz ); break; case xz_opt: parse_compressor( arg, fmt_xz ); break; + case zst_opt: parse_compressor( arg, fmt_zst ); break; default : internal_error( "uncaught option." ); } } // end process options -#if defined(__MSVCRT__) || defined(__OS2__) +#if defined __MSVCRT__ || defined __OS2__ setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif if( argind >= parser.arguments() ) { show_error( "No files given.", 0, true ); return 2; } - if( argind + 2 < parser.arguments() ) + if( parser.arguments() - argind > 2 ) { show_error( "Too many files.", 0, true ); return 2; } const int files = parser.arguments() - argind; @@ -446,10 +496,11 @@ int main( const int argc, const char * const argv[] ) return 2; } - int retval = cmp( max_size, infd, filenames, print_bytes ); + bool finished[2] = { false, false }; + int retval = cmp( max_size, infd, filenames, finished, print_bytes ); for( int i = 0; i < 2; ++i ) - if( !good_status( children[i], retval == 0 && max_size < 0 ) ) retval = 2; + if( !good_status( children[i], finished[i] ) ) retval = 2; for( int i = 0; i < 2; ++i ) { diff --git a/zcmpdiff.cc b/zcmpdiff.cc index fceb8cf..5688ee2 100644 --- a/zcmpdiff.cc +++ b/zcmpdiff.cc @@ -1,5 +1,5 @@ /* Common code for zcmp and zdiff - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -50,13 +50,14 @@ int open_other_instream( std::string & name ) } -void parse_format_types2( const std::string & arg, int format_types[2] ) +void parse_format_types2( const std::string & arg, const char * const pn, + int format_types[2] ) { const unsigned i = std::min( arg.find( ',' ), arg.size() ); - if( i > 0 ) format_types[0] = parse_format_type( arg.substr( 0, i ) ); + if( i > 0 ) format_types[0] = parse_format_type( arg.substr( 0, i ), pn ); else format_types[0] = -1; if( i + 1 < arg.size() ) format_types[1] = - parse_format_type( arg.substr( i + 1 ) ); + parse_format_type( arg.substr( i + 1 ), pn ); else format_types[1] = -1; } @@ -1,5 +1,5 @@ /* Zdiff - decompress and compare two files line by line - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ #include <stdint.h> #include <unistd.h> #include <sys/stat.h> -#if defined(__MSVCRT__) || defined(__OS2__) +#if defined __MSVCRT__ || defined __OS2__ #include <io.h> #endif @@ -39,7 +39,6 @@ #include "rc.h" #include "zutils.h" -// 'verbosity' is always 0 in zdiff; no --verbose or --quiet available. namespace { @@ -54,7 +53,8 @@ void show_help() "input. If any file given is compressed, its decompressed content is used.\n" "zdiff is a front end to the program diff and has the limitation that messages\n" "from diff refer to temporary file names instead of those specified.\n" - "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" + "\n'zdiff -v -V' prints the version of the diff program used.\n" + "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n" "\nUsage: zdiff [options] file1 [file2]\n" "\nzdiff compares file1 to file2. The standard input is used only if file1 or\n" "file2 refers to standard input. If file2 is omitted zdiff tries the\n" @@ -63,7 +63,7 @@ void show_help() " the corresponding uncompressed file (the name of file1 with the\n" " extension removed).\n" "\n - If file1 is uncompressed, compares it with the decompressed\n" - " contents of file1.[lz|bz2|gz|xz] (the first one that is found).\n" + " contents of file1.[lz|bz2|gz|zst|xz] (the first one that is found).\n" "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" "Some options only work if the diff program used supports them.\n" "\nOptions:\n" @@ -79,7 +79,7 @@ void show_help() " -i, --ignore-case ignore case differences in file contents\n" " -M, --format=<list> process only the formats in <list>\n" " -N, --no-rcfile don't read runtime configuration file\n" - " -O, --force-format=[<f1>][,<f2>] force the formats given (bz2, gz, lz, xz)\n" + " -O, --force-format=[<f1>][,<f2>] force the formats given (bz2,gz,lz,xz,zst)\n" " -p, --show-c-function show which C function each change is in\n" " -q, --brief output only whether files differ\n" " -s, --report-identical-files report when two files are identical\n" @@ -87,13 +87,15 @@ void show_help() " -T, --initial-tab make tabs line up by prepending a tab\n" " -u use the unified output format\n" " -U, --unified=<n> same as -u but use <n> lines of context\n" + " -v, --verbose verbose mode (for --version)\n" " -w, --ignore-all-space ignore all white space\n" " -W, --width=<n> output at most <n> print columns\n" " -y, --side-by-side output in two columns\n" " --bz2=<command> set compressor and options for bzip2 format\n" " --gz=<command> set compressor and options for gzip format\n" " --lz=<command> set compressor and options for lzip format\n" - " --xz=<command> set compressor and options for xz format\n" ); + " --xz=<command> set compressor and options for xz format\n" + " --zst=<command> set compressor and options for zstd format\n" ); show_help_addr(); } @@ -264,7 +266,7 @@ void set_signals() int main( const int argc, const char * const argv[] ) { - enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt }; + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt }; std::vector< const char * > diff_args; // args to diff, maybe empty int format_types[2] = { -1, -1 }; program_name = "zdiff"; @@ -291,15 +293,17 @@ int main( const int argc, const char * const argv[] ) { 'T', "initial-tab", Arg_parser::no }, { 'u', 0, Arg_parser::no }, { 'U', "unified", Arg_parser::yes }, + { 'v', "verbose", Arg_parser::no }, { 'V', "version", Arg_parser::no }, { 'w', "ignore-all-space", Arg_parser::no }, { 'W', "width", Arg_parser::yes }, { 'y', "side-by-side", Arg_parser::no }, - { bz2_opt, "bz2", Arg_parser::yes }, - { gz_opt, "gz", Arg_parser::yes }, - { lz_opt, "lz", Arg_parser::yes }, - { xz_opt, "xz", Arg_parser::yes }, - { 0 , 0, Arg_parser::no } }; + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { zst_opt, "zst", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option @@ -312,6 +316,7 @@ int main( const int argc, const char * const argv[] ) { const int code = parser.code( argind ); if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); const std::string & arg = parser.argument( argind ); switch( code ) { @@ -325,9 +330,9 @@ int main( const int argc, const char * const argv[] ) case 'E': diff_args.push_back( "-E" ); break; case 'h': show_help(); return 0; case 'i': diff_args.push_back( "-i" ); break; - case 'M': parse_format_list( arg ); break; + case 'M': parse_format_list( arg, pn ); break; case 'N': break; - case 'O': parse_format_types2( arg, format_types ); break; + case 'O': parse_format_types2( arg, pn, format_types ); break; case 'p': diff_args.push_back( "-p" ); break; case 'q': diff_args.push_back( "-q" ); break; case 's': diff_args.push_back( "-s" ); break; @@ -336,7 +341,8 @@ int main( const int argc, const char * const argv[] ) case 'u': diff_args.push_back( "-u" ); break; case 'U': diff_args.push_back( "-U" ); diff_args.push_back( arg.c_str() ); break; - case 'V': show_version(); return 0; + case 'v': verbosity = 1; break; + case 'V': show_version( DIFF " --version" ); return 0; case 'w': diff_args.push_back( "-w" ); break; case 'W': diff_args.push_back( "-W" ); diff_args.push_back( arg.c_str() ); break; @@ -345,18 +351,19 @@ int main( const int argc, const char * const argv[] ) case gz_opt: parse_compressor( arg, fmt_gz ); break; case lz_opt: parse_compressor( arg, fmt_lz ); break; case xz_opt: parse_compressor( arg, fmt_xz ); break; + case zst_opt: parse_compressor( arg, fmt_zst ); break; default : internal_error( "uncaught option." ); } } // end process options -#if defined(__MSVCRT__) || defined(__OS2__) +#if defined __MSVCRT__ || defined __OS2__ setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif if( argind >= parser.arguments() ) { show_error( "No files given.", 0, true ); return 2; } - if( argind + 2 < parser.arguments() ) + if( parser.arguments() - argind > 2 ) { show_error( "Too many files.", 0, true ); return 2; } const int files = parser.arguments() - argind; @@ -427,7 +434,19 @@ int main( const int argc, const char * const argv[] ) int retval = wait_for_child( diff_pid, DIFF ); for( int i = 0; i < 2; ++i ) - if( !good_status( children[i], retval == 0 ) ) retval = 2; + { + int infd; // fifo from decompressor + do infd = open( fifonames[i].c_str(), O_RDONLY | O_NONBLOCK | O_BINARY ); + while( infd < 0 && errno == EINTR ); + bool finished = false; // set to true if fifo is empty and at EOF + if( infd >= 0 ) + { + uint8_t b; + if( readblock( infd, &b, 1 ) <= 0 && errno == 0 ) finished = true; + close( infd ); + } + if( !good_status( children[i], finished ) ) retval = 2; + } for( int i = 0; i < 2; ++i ) if( filenames[i] != "-" && close( infd[i] ) != 0 ) @@ -1,5 +1,5 @@ /* Zgrep - search compressed files for a regular expression - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ #include <stdint.h> #include <unistd.h> #include <sys/stat.h> -#if defined(__MSVCRT__) || defined(__OS2__) +#if defined __MSVCRT__ || defined __OS2__ #include <io.h> #endif @@ -60,7 +60,8 @@ void show_help() "compressed format.\n" "\nIf no files are specified, recursive searches examine the current\n" "working directory, and nonrecursive searches read standard input.\n" - "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" + "\n'zgrep --verbose -V' prints the version of the grep program used.\n" + "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n" "\nUsage: zgrep [options] <pattern> [files]\n" "\nExit status is 0 if match, 1 if no match, 2 if trouble.\n" "Some options only work if the grep program used supports them.\n" @@ -78,8 +79,8 @@ void show_help() " -E, --extended-regexp <pattern> is an extended regular expression\n" " -f, --file=<file> obtain patterns from <file>\n" " -F, --fixed-strings <pattern> is a set of newline-separated strings\n" - " -h, --no-filename suppress the prefixing filename on output\n" - " -H, --with-filename print the filename for each match\n" + " -h, --no-filename suppress the prefixing file name on output\n" + " -H, --with-filename print the file name for each match\n" " -i, --ignore-case ignore case distinctions\n" " -I ignore binary files\n" " -l, --files-with-matches only print names of files containing matches\n" @@ -89,7 +90,7 @@ void show_help() " -n, --line-number print the line number of each line\n" " -N, --no-rcfile don't read runtime configuration file\n" " -o, --only-matching show only the part of a line matching <pattern>\n" - " -O, --force-format=<fmt> force the format given (bz2, gz, lz, xz)\n" + " -O, --force-format=<fmt> force the format given (bz2, gz, lz, xz, zst)\n" " -q, --quiet suppress all messages\n" " -r, --recursive operate recursively on directories\n" " -R, --dereference-recursive recursively follow symbolic links\n" @@ -102,6 +103,7 @@ void show_help() " --gz=<command> set compressor and options for gzip format\n" " --lz=<command> set compressor and options for lzip format\n" " --xz=<command> set compressor and options for xz format\n" + " --zst=<command> set compressor and options for zstd format\n" "\nNumbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); show_help_addr(); @@ -217,7 +219,7 @@ int zgrep_file( int infd, const int format_index, int main( const int argc, const char * const argv[] ) { enum { help_opt = 256, verbose_opt, color_opt, - bz2_opt, gz_opt, lz_opt, xz_opt }; + bz2_opt, gz_opt, lz_opt, xz_opt, zst_opt }; int format_index = -1; int list_mode = 0; // 1 = list matches, -1 = list non-matches int recursive = 0; // 1 = '-r', 2 = '-R' @@ -264,11 +266,12 @@ int main( const int argc, const char * const argv[] ) { help_opt, "help", Arg_parser::no }, { verbose_opt, "verbose", Arg_parser::no }, { color_opt, "color", Arg_parser::maybe }, - { bz2_opt, "bz2", Arg_parser::yes }, - { gz_opt, "gz", Arg_parser::yes }, - { lz_opt, "lz", Arg_parser::yes }, - { xz_opt, "xz", Arg_parser::yes }, - { 0 , 0, Arg_parser::no } }; + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { zst_opt, "zst", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option @@ -282,6 +285,7 @@ int main( const int argc, const char * const argv[] ) { const int code = parser.code( argind ); if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); const std::string & arg = parser.argument( argind ); switch( code ) { @@ -308,22 +312,22 @@ int main( const int argc, const char * const argv[] ) case 'L': grep_args.push_back( "-L" ); list_mode = -1; break; case 'm': grep_args.push_back( "-m" ); grep_args.push_back( arg.c_str() ); break; - case 'M': parse_format_list( arg ); break; + case 'M': parse_format_list( arg, pn ); break; case 'n': grep_args.push_back( "-n" ); break; case 'N': break; case 'o': grep_args.push_back( "-o" ); break; - case 'O': format_index = parse_format_type( arg ); break; + case 'O': format_index = parse_format_type( arg, pn ); break; case 'q': grep_args.push_back( "-q" ); verbosity = -1; break; case 'r': recursive = 1; break; case 'R': recursive = 2; break; case 's': grep_args.push_back( "-s" ); no_messages = true; break; case 'v': grep_args.push_back( "-v" ); break; - case 'V': show_version(); return 0; + case 'V': show_version( GREP " --version" ); return 0; case 'w': grep_args.push_back( "-w" ); break; case 'x': grep_args.push_back( "-x" ); break; - case help_opt : show_help(); return 0; - case verbose_opt: if( verbosity < 4 ) ++verbosity; - no_messages = false; break; + case help_opt: show_help(); return 0; + case verbose_opt: no_messages = false; if( verbosity < 4 ) ++verbosity; + break; case color_opt: color_option = "--color"; if( !arg.empty() ) { color_option += '='; color_option += arg; } break; @@ -331,6 +335,7 @@ int main( const int argc, const char * const argv[] ) case gz_opt: parse_compressor( arg, fmt_gz ); break; case lz_opt: parse_compressor( arg, fmt_lz ); break; case xz_opt: parse_compressor( arg, fmt_xz ); break; + case zst_opt: parse_compressor( arg, fmt_zst ); break; default : internal_error( "uncaught option." ); } } // end process options @@ -338,7 +343,7 @@ int main( const int argc, const char * const argv[] ) if( !color_option.empty() ) // push the last value set grep_args.push_back( color_option.c_str() ); -#if defined(__MSVCRT__) || defined(__OS2__) +#if defined __MSVCRT__ || defined __OS2__ setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif @@ -1,5 +1,5 @@ /* Ztest - verify the integrity of compressed files - Copyright (C) 2010-2021 Antonio Diaz Diaz. + Copyright (C) 2010-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ #include <stdint.h> #include <unistd.h> #include <sys/stat.h> -#if defined(__MSVCRT__) || defined(__OS2__) +#if defined __MSVCRT__ || defined __OS2__ #include <io.h> #endif @@ -60,7 +60,7 @@ void show_help() "test when testing multiple files.\n" "\nIf no files are specified, recursive searches examine the current\n" "working directory, and nonrecursive searches read standard input.\n" - "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" + "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n" "\nNote that error detection in the xz format is broken. First, some xz\n" "files lack integrity information. Second, not all xz decompressors can\n" "verify the integrity of all xz files. Third, section 2.1.1.2 'Stream\n" @@ -76,7 +76,7 @@ void show_help() " -V, --version output version information and exit\n" " -M, --format=<list> process only the formats in <list>\n" " -N, --no-rcfile don't read runtime configuration file\n" - " -O, --force-format=<fmt> force the format given (bz2, gz, lz, xz)\n" + " -O, --force-format=<fmt> force the format given (bz2, gz, lz, xz, zst)\n" " -q, --quiet suppress all messages\n" " -r, --recursive operate recursively on directories\n" " -R, --dereference-recursive recursively follow symbolic links\n" @@ -84,7 +84,8 @@ void show_help() " --bz2=<command> set compressor and options for bzip2 format\n" " --gz=<command> set compressor and options for gzip format\n" " --lz=<command> set compressor and options for lzip format\n" - " --xz=<command> set compressor and options for xz format\n" ); + " --xz=<command> set compressor and options for xz format\n" + " --zst=<command> set compressor and options for zstd format\n" ); show_help_addr(); } @@ -164,7 +165,9 @@ int ztest_file( const int infd, int format_index, const std::string & input_filename, const std::vector< const char * > & ztest_args ) { + // bzip2, gzip, and lzip are the primary formats. xz and zstd are optional. static int disable_xz = -1; // tri-state bool + static int disable_zst = -1; // tri-state bool uint8_t magic_data[magic_buf_size]; int magic_size = 0; if( format_index < 0 ) @@ -178,9 +181,24 @@ int ztest_file( const int infd, int format_index, { std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; disable_xz = ( std::system( command.c_str() ) != 0 ); + if( disable_xz && verbosity >= 2 ) + std::fprintf( stderr, "%s: '%s' not found. Ignoring xz files.\n", + program_name, compressor_name ); } if( disable_xz ) return 0; // ignore this file if no xz installed } + else if( format_index == fmt_zst ) + { + if( disable_zst < 0 ) + { + std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; + disable_zst = ( std::system( command.c_str() ) != 0 ); + if( disable_zst && verbosity >= 2 ) + std::fprintf( stderr, "%s: '%s' not found. Ignoring zstd files.\n", + program_name, compressor_name ); + } + if( disable_zst ) return 0; // ignore this file if no zstd installed + } const pid_t pid = fork(); @@ -216,7 +234,7 @@ int ztest_file( const int infd, int format_index, int main( const int argc, const char * const argv[] ) { - enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt }; + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt }; int format_index = -1; int recursive = 0; // 1 = '-r', 2 = '-R' std::list< std::string > filenames; @@ -235,11 +253,12 @@ int main( const int argc, const char * const argv[] ) { 'R', "dereference-recursive", Arg_parser::no }, { 'v', "verbose", Arg_parser::no }, { 'V', "version", Arg_parser::no }, - { bz2_opt, "bz2", Arg_parser::yes }, - { gz_opt, "gz", Arg_parser::yes }, - { lz_opt, "lz", Arg_parser::yes }, - { xz_opt, "xz", Arg_parser::yes }, - { 0 , 0, Arg_parser::no } }; + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { zst_opt, "zst", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option @@ -252,13 +271,14 @@ int main( const int argc, const char * const argv[] ) { const int code = parser.code( argind ); if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); const std::string & arg = parser.argument( argind ); switch( code ) { case 'h': show_help(); return 0; - case 'M': parse_format_list( arg ); break; + case 'M': parse_format_list( arg, pn ); break; case 'N': break; - case 'O': format_index = parse_format_type( arg ); break; + case 'O': format_index = parse_format_type( arg, pn ); break; case 'q': verbosity = -1; ztest_args.push_back( "-q" ); break; case 'r': recursive = 1; break; case 'R': recursive = 2; break; @@ -269,11 +289,12 @@ int main( const int argc, const char * const argv[] ) case gz_opt: parse_compressor( arg, fmt_gz, 1 ); break; case lz_opt: parse_compressor( arg, fmt_lz, 1 ); break; case xz_opt: parse_compressor( arg, fmt_xz, 1 ); break; + case zst_opt: parse_compressor( arg, fmt_zst, 1 ); break; default : internal_error( "uncaught option." ); } } // end process options -#if defined(__MSVCRT__) || defined(__OS2__) +#if defined __MSVCRT__ || defined __OS2__ setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif @@ -1,5 +1,5 @@ -/* Zupdate - recompress bzip2, gzip, xz files to lzip format - Copyright (C) 2013-2021 Antonio Diaz Diaz. +/* Zupdate - recompress bzip2, gzip, xz, zstd files to lzip format + Copyright (C) 2013-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -33,7 +33,7 @@ #include <utime.h> #include <sys/stat.h> #include <sys/wait.h> -#if defined(__MSVCRT__) || defined(__OS2__) +#if defined __MSVCRT__ || defined __OS2__ #include <io.h> #endif @@ -51,7 +51,7 @@ namespace { void show_help() { - std::printf( "zupdate recompresses files from bzip2, gzip, and xz formats to lzip\n" + std::printf( "zupdate recompresses files from bzip2, gzip, xz, and zstd formats to lzip\n" "format. Each original is compared with the new file and then deleted.\n" "Only regular files with standard file name extensions are recompressed,\n" "other files are ignored. Compressed files are decompressed and then\n" @@ -67,8 +67,8 @@ void show_help() "to be safe and not cause any data loss. Therefore, existing lzip\n" "compressed files are never overwritten nor deleted.\n" "\nThe names of the original files must have one of the following extensions:\n" - "'.bz2', '.gz', or '.xz', which are recompressed to '.lz';\n" - "'.tbz', '.tbz2', '.tgz', or '.txz', which are recompressed to '.tlz'.\n" + "\n'.bz2', '.gz', '.xz', or '.zst', which are recompressed to '.lz'.\n" + "\n'.tbz', '.tbz2', '.tgz', '.txz', or '.tzst', which are recompressed to '.tlz'.\n" "\nUsage: zupdate [options] [files]\n" "\nExit status is 0 if all the compressed files were successfully recompressed\n" "(if needed), compared, and deleted (if requested). Non-zero otherwise.\n" @@ -88,7 +88,8 @@ void show_help() " --bz2=<command> set compressor and options for bzip2 format\n" " --gz=<command> set compressor and options for gzip format\n" " --lz=<command> set compressor and options for lzip format\n" - " --xz=<command> set compressor and options for xz format\n" ); + " --xz=<command> set compressor and options for xz format\n" + " --zst=<command> set compressor and options for zstd format\n" ); show_help_addr(); } @@ -125,17 +126,19 @@ void set_permissions( const char * const rname, const struct stat & in_stats ) t.modtime = in_stats.st_mtime; if( utime( rname, &t ) != 0 ) warning = true; if( warning && verbosity >= 2 ) - show_error( "Can't change output file attributes." ); + show_file_error( rname, "Can't change output file attributes.", errno ); } - // Returns 0 for success, -1 for file skipped, 1 for error. + // Return 0 if success, -1 if file skipped, 1 if error. int zupdate_file( const std::string & name, const char * const lzip_name, const std::vector< std::string > & lzip_args2, const bool force, const bool keep_input_files, const bool no_rcfile ) { + // bzip2, gzip, and lzip are the primary formats. xz and zstd are optional. static int disable_xz = -1; // tri-state bool + static int disable_zst = -1; // tri-state bool int format_index = -1; std::string rname; // recompressed name @@ -198,9 +201,24 @@ int zupdate_file( const std::string & name, const char * const lzip_name, { std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; disable_xz = ( std::system( command.c_str() ) != 0 ); + if( disable_xz && verbosity >= 2 ) + std::fprintf( stderr, "%s: '%s' not found. Ignoring xz files.\n", + program_name, compressor_name ); } if( disable_xz ) return 0; // ignore this file if no xz installed } + else if( format_index == fmt_zst ) + { + if( disable_zst < 0 ) + { + std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; + disable_zst = ( std::system( command.c_str() ) != 0 ); + if( disable_zst && verbosity >= 2 ) + std::fprintf( stderr, "%s: '%s' not found. Ignoring zstd files.\n", + program_name, compressor_name ); + } + if( disable_zst ) return 0; // ignore this file if no zstd installed + } if( !lz_exists ) // recompress { @@ -307,7 +325,7 @@ int zupdate_file( const std::string & name, const char * const lzip_name, int main( const int argc, const char * const argv[] ) { - enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt }; + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt }; int recursive = 0; // 1 = '-r', 2 = '-R' std::list< std::string > filenames; std::vector< std::string > lzip_args2; // args to lzip, maybe empty @@ -340,11 +358,12 @@ int main( const int argc, const char * const argv[] ) { 'R', "dereference-recursive", Arg_parser::no }, { 'v', "verbose", Arg_parser::no }, { 'V', "version", Arg_parser::no }, - { bz2_opt, "bz2", Arg_parser::yes }, - { gz_opt, "gz", Arg_parser::yes }, - { lz_opt, "lz", Arg_parser::yes }, - { xz_opt, "xz", Arg_parser::yes }, - { 0 , 0, Arg_parser::no } }; + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { zst_opt, "zst", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; const Arg_parser parser( argc, argv, options ); if( parser.error().size() ) // bad option @@ -357,6 +376,7 @@ int main( const int argc, const char * const argv[] ) { const int code = parser.code( argind ); if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); const std::string & arg = parser.argument( argind ); switch( code ) { @@ -367,7 +387,7 @@ int main( const int argc, const char * const argv[] ) case 'h': show_help(); return 0; case 'k': keep_input_files = true; break; case 'l': lzip_args2.push_back( "-v" ); break; - case 'M': parse_format_list( arg ); break; + case 'M': parse_format_list( arg, pn ); break; case 'N': no_rcfile = true; break; case 'q': verbosity = -1; lzip_args2.push_back( "-q" ); break; case 'r': recursive = 1; break; @@ -378,11 +398,12 @@ int main( const int argc, const char * const argv[] ) case gz_opt: parse_compressor( arg, fmt_gz, 1 ); break; case lz_opt: parse_compressor( arg, fmt_lz, 1 ); break; case xz_opt: parse_compressor( arg, fmt_xz, 1 ); break; + case zst_opt: parse_compressor( arg, fmt_zst, 1 ); break; default : internal_error( "uncaught option." ); } } // end process options -#if defined(__MSVCRT__) || defined(__OS2__) +#if defined __MSVCRT__ || defined __OS2__ setmode( STDIN_FILENO, O_BINARY ); setmode( STDOUT_FILENO, O_BINARY ); #endif @@ -1,5 +1,5 @@ /* Zutils - Utilities dealing with compressed files - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -46,8 +46,8 @@ inline bool isvalid_ds( const uint8_t ds ) // lzip valid dictionary_size } -/* Returns -1 if child not terminated, 2 in case of error, or exit status of - child process 'pid'. +/* Return -1 if child not terminated, 2 in case of error, or exit status of + child process 'pid'. Return 0 if child was terminated by SIGPIPE. */ int child_status( const pid_t pid, const char * const name ) { @@ -73,8 +73,8 @@ int child_status( const pid_t pid, const char * const name ) } // end namespace -/* Returns the number of bytes really read. - If (returned value < size) and (errno == 0), means EOF was reached. +/* Return the number of bytes really read. + If (value returned < size) and (errno == 0), means EOF was reached. */ int readblock( const int fd, uint8_t * const buf, const int size ) { @@ -92,8 +92,8 @@ int readblock( const int fd, uint8_t * const buf, const int size ) } -/* Returns the number of bytes really written. - If (returned value < size), it is always an error. +/* Return the number of bytes really written. + If (value returned < size), it is always an error. */ int writeblock( const int fd, const uint8_t * const buf, const int size ) { @@ -146,7 +146,7 @@ bool good_status( const Children & children, const bool finished ) // even if compressor finished, trailing data may remain in data feeder if( i == 0 || !finished ) { - const int tmp = child_status( pid, name ); + const int tmp = child_status( pid, name ); // 0 if SIGPIPE if( tmp < 0 ) // child not terminated { kill( pid, SIGTERM ); wait_for_child( pid, name ); } else if( tmp != 0 ) error = true; // child status != 0 @@ -246,7 +246,7 @@ bool set_data_feeder( const std::string & filename, int * const infdp, } -// Returns format index or -1 if uncompressed +// Return format index, or -1 if uncompressed. // int test_format( const int infd, uint8_t magic_data[], int * const magic_sizep ) @@ -254,7 +254,8 @@ int test_format( const int infd, uint8_t magic_data[], enum { bzip2_magic_size = 3, gzip_magic_size = 2, lzip_magic_size = 5, - xz_magic_size = 5 }; + xz_magic_size = 5, + zstd_magic_size = 4 }; const uint8_t bzip2_magic[bzip2_magic_size] = { 0x42, 0x5A, 0x68 }; // "BZh" const uint8_t gzip_magic[gzip_magic_size] = @@ -263,19 +264,23 @@ int test_format( const int infd, uint8_t magic_data[], { 0x4C, 0x5A, 0x49, 0x50, 0x01 }; // "LZIP\001" const uint8_t xz_magic[xz_magic_size] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A }; // 0xFD, "7zXZ" + const uint8_t zstd_magic[zstd_magic_size] = + { 0x28, 0xB5, 0x2F, 0xFD }; // 0xFD2FB528 LE *magic_sizep = readblock( infd, magic_data, magic_buf_size ); - if( *magic_sizep == magic_buf_size ) + if( *magic_sizep == magic_buf_size ) // test formats in search order { + if( std::memcmp( magic_data, lzip_magic, lzip_magic_size ) == 0 && + isvalid_ds( magic_data[lzip_magic_size] ) ) + return fmt_lz; if( std::memcmp( magic_data, bzip2_magic, bzip2_magic_size ) == 0 && magic_data[3] >= '1' && magic_data[3] <= '9' && std::memcmp( magic_data + 4, "1AY&SY", 6 ) == 0 ) return fmt_bz2; if( std::memcmp( magic_data, gzip_magic, gzip_magic_size ) == 0 ) return fmt_gz; - if( std::memcmp( magic_data, lzip_magic, lzip_magic_size ) == 0 && - isvalid_ds( magic_data[lzip_magic_size] ) ) - return fmt_lz; + if( std::memcmp( magic_data, zstd_magic, zstd_magic_size ) == 0 ) + return fmt_zst; if( std::memcmp( magic_data, xz_magic, xz_magic_size ) == 0 ) return fmt_xz; } @@ -1,5 +1,5 @@ /* Zutils - Utilities dealing with compressed files - Copyright (C) 2009-2021 Antonio Diaz Diaz. + Copyright (C) 2009-2022 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ bool set_data_feeder( const std::string & filename, int * const infdp, enum { magic_buf_size = 10 }; // >= longest extended magic (bzip2) -// Returns format index or -1 if uncompressed +// Return format index, or -1 if uncompressed. // int test_format( const int infd, uint8_t magic_data[], int * const magic_sizep ); @@ -14,3 +14,4 @@ # gz = pigz -p2 # lz = plzip -n2 # xz = pixz -p2 +# zst = zstd -T2 |