From f9be52fa859528b0439964589d03d85796275cdb Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 27 Apr 2024 15:35:06 +0200 Subject: Adding upstream version 1.10. Signed-off-by: Daniel Baumann --- AUTHORS | 1 + COPYING | 338 ++++++++++++++++++ ChangeLog | 187 ++++++++++ INSTALL | 81 +++++ Makefile.in | 238 +++++++++++++ NEWS | 9 + README | 47 +++ arg_parser.cc | 196 +++++++++++ arg_parser.h | 99 ++++++ configure | 207 +++++++++++ doc/zcat.1 | 103 ++++++ doc/zcmp.1 | 90 +++++ doc/zdiff.1 | 121 +++++++ doc/zgrep.1 | 153 ++++++++ doc/ztest.1 | 82 +++++ doc/zupdate.1 | 91 +++++ doc/zutils.info | 838 +++++++++++++++++++++++++++++++++++++++++++ doc/zutils.texi | 882 ++++++++++++++++++++++++++++++++++++++++++++++ rc.cc | 411 +++++++++++++++++++++ rc.h | 62 ++++ recursive.cc | 109 ++++++ testsuite/check.sh | 560 +++++++++++++++++++++++++++++ testsuite/test.txt | 676 +++++++++++++++++++++++++++++++++++ testsuite/test.txt.tar | Bin 0 -> 40960 bytes testsuite/test_bad_crc.lz | Bin 0 -> 7376 bytes testsuite/zcat_vs.dat | 68 ++++ testsuite/zero_bad_crc.gz | Bin 0 -> 20 bytes testsuite/zero_bad_crc.lz | Bin 0 -> 36 bytes zcat.cc | 386 ++++++++++++++++++++ zcatgrep.cc | 59 ++++ zcmp.cc | 471 +++++++++++++++++++++++++ zcmpdiff.cc | 70 ++++ zdiff.cc | 440 +++++++++++++++++++++++ zegrep.in | 3 + zfgrep.in | 3 + zgrep.cc | 401 +++++++++++++++++++++ ztest.cc | 335 ++++++++++++++++++ zupdate.cc | 412 ++++++++++++++++++++++ zutils.cc | 283 +++++++++++++++ zutils.h | 37 ++ zutilsrc | 16 + 41 files changed, 8565 insertions(+) create mode 100644 AUTHORS create mode 100644 COPYING create mode 100644 ChangeLog create mode 100644 INSTALL create mode 100644 Makefile.in create mode 100644 NEWS create mode 100644 README create mode 100644 arg_parser.cc create mode 100644 arg_parser.h create mode 100755 configure create mode 100644 doc/zcat.1 create mode 100644 doc/zcmp.1 create mode 100644 doc/zdiff.1 create mode 100644 doc/zgrep.1 create mode 100644 doc/ztest.1 create mode 100644 doc/zupdate.1 create mode 100644 doc/zutils.info create mode 100644 doc/zutils.texi create mode 100644 rc.cc create mode 100644 rc.h create mode 100644 recursive.cc create mode 100755 testsuite/check.sh create mode 100644 testsuite/test.txt create mode 100644 testsuite/test.txt.tar create mode 100644 testsuite/test_bad_crc.lz create mode 100644 testsuite/zcat_vs.dat create mode 100644 testsuite/zero_bad_crc.gz create mode 100644 testsuite/zero_bad_crc.lz create mode 100644 zcat.cc create mode 100644 zcatgrep.cc create mode 100644 zcmp.cc create mode 100644 zcmpdiff.cc create mode 100644 zdiff.cc create mode 100644 zegrep.in create mode 100644 zfgrep.in create mode 100644 zgrep.cc create mode 100644 ztest.cc create mode 100644 zupdate.cc create mode 100644 zutils.cc create mode 100644 zutils.h create mode 100644 zutilsrc diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..f36c5a2 --- /dev/null +++ b/AUTHORS @@ -0,0 +1 @@ +The zutils were written by Antonio Diaz Diaz. diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..4ad17ae --- /dev/null +++ b/COPYING @@ -0,0 +1,338 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..cdbc3da --- /dev/null +++ b/ChangeLog @@ -0,0 +1,187 @@ +2021-01-05 Antonio Diaz Diaz + + * Version 1.10 released. + * zdiff.cc (set_fifonames): Encode pid in little endian order. + * zupdate.cc (zupdate_file): Fix a portability issue with Solaris 10. + * zutils.texi: Document that 'zgrep -L' fails with GNU grep 3.2 to 3.4. + * check.sh: Test empty input files with all tools except zupdate. + +2020-06-27 Antonio Diaz Diaz + + * Version 1.9 released. + * zcmp.cc, zdiff.cc: Read standard input only if requested. + * zdiff.cc (main): Pass options '-W' and '-y' to diff. + * zutils.cc (test_format): Detect bzip2 and lzip files better. + * ztest.cc (main): Continue testing if any input file is a terminal. + If verbosity >= 1, print number of files that failed the test. + * zcat.cc, zgrep.cc, ztest.cc (main): Check return value of close(infd). + * zutils.cc (good_status): Ignore trailing data remaining in feeder. + * zupdate.cc (zupdate_file): Support new and old lzip option '-o'. + Keep combined extensions: tgz, tbz, tbz2, txz --> tlz. + Quote file names in zcmp_command to allow file names with spaces. + * *.cc (main): Set a valid invocation_name even if argc == 0. + * zutils.texi: Improve descriptions of zcat, zcmp, and zdiff. + +2019-01-01 Antonio Diaz Diaz + + * Version 1.8 released. + * zcat.cc: Fix a buffer overflow on outbuf when '-v' is used. + * zcat.cc (cat): A canary byte has been added to outbuf. + * New option '-R, --dereference-recursive'. + * Option '-r, --recursive' now skips symlinks. + * If no files and recursive, examine current working directory. + * recursive.cc (test_full_name): Detect directory loops. + * recursive.cc: Ignore directories if not --recursive. + * recursive.cc: Remove extra trailing slashes from directory args. + * zcatgrep.cc (open_instream): Show correct errno. + * zutils.cc (good_status): Wait for killed child. + * Test and document continuation or exit of zcat, zgrep, ztest, + and zupdate in case of error. + * configure: Accept appending to CXXFLAGS, 'CXXFLAGS+=OPTIONS'. + +2018-02-13 Antonio Diaz Diaz + + * Version 1.7 released. + * zgrep.cc (main): Pass option '--color' to grep. + * check.sh: Add new tests for zgrep. + +2017-04-05 Antonio Diaz Diaz + + * Version 1.6 released. + * zcmp.cc: Accept 'B' suffix in '--ignore-initial=1kB:1234B'. + * zutils.cc (feed_data): Show input file name in error messages. + +2016-05-15 Antonio Diaz Diaz + + * Version 1.5 released. + * zupdate.cc (zupdate_file): Pass '-q' to zcmp if verbosity < 0. + * zcat.cc, zgrep.cc, ztest.cc (main): Don't use stdin more than once. + * zdiff.cc (set_fifonames): Use '_' if both names are different. + * configure: Avoid warning on some shells when testing for g++. + * Makefile.in: Detect the existence of install-info. + * check.sh: A POSIX shell is required to run the tests. + +2015-05-29 Antonio Diaz Diaz + + * Version 1.4 released. + * Option '--format' has been renamed to '-O, --force-format'. + * Add new option '-M, --format=' to all utilities. + * zgrep.cc (main): Pass '-e' to grep if pattern begins with '-'. + * Makefile.in: New targets 'install*-compress'. + +2014-08-30 Antonio Diaz Diaz + + * Version 1.3 released. + * check.sh: Fix two values of expected exit status. + * zutils.texi: Document that '--format' does not verify format. + * Add two missing #includes. + * Change license to GPL version 2 or later. + +2014-02-01 Antonio Diaz Diaz + + * Version 1.2 released. + * New utility; zupdate. + * Remove zutils executable. Utils are now independent executables. + * zgrep.cc: Fix the exit status returned on error. + * zutils.texinfo: Rename to zutils.texi. + +2013-08-02 Antonio Diaz Diaz + + * Version 1.1 released. + * Add options '--bz2', '--gz', '--lz', and '--xz' to all utilities. + * Add runtime configuration file 'zutilsrc'. + * New function 'good_status' checks exit status of all children. + * Fix all uses of decompressed/uncompressed in the documentation. + +2013-05-31 Antonio Diaz Diaz + + * Version 1.0 released. + * Add new option '--format' to all utilities. + * main.cc (main): Make 'grep_show_name' tri-state so that file + name is no prefixed to output by default when searching one + file and '--recursive' has not been selected. + * zgrep.cc: Fix output of option '-L' (it behaved like '-l'). + * zcmp.cc: Fix deadlock when option '-n' is used. + * zdiff.cc (set_data_feeder): Call compressor with option '-q' + only if verbosity < 0. + * zutils.cc (set_data_feeder): Likewise. + * Change quote characters in messages as advised by GNU Standards. + * configure: Options now accept a separate argument. + Rename 'datadir' to 'datarootdir'. Ignore environment variables. + * Makefile.in: New target 'install-bin'. + * Use 'setmode' instead of '_setmode' on Windows and OS/2. + * zcat.cc (Line_number): Fix a portability issue with Solaris 9. + * INSTALL: Document installing zutils along with GNU gzip. + +2011-01-11 Antonio Diaz Diaz + + * Version 0.9 released. + * configure: New options 'DIFF' and 'GREP'. + * zcmp.cc: Fix deadlock when files differ. + * zgrep.cc: Fix deadlock when binary file matches. + +2010-11-15 Antonio Diaz Diaz + + * Version 0.8 released. + * main.cc: New options '--zcat', '--zgrep', and '--ztest'. + * zcat.cc: New file implementing zcat+cat functionality in C++. + * zcmp.cc: New file implementing zcmp+cmp functionality in C++. + * doc/zcmp.1: New file. + * Remove files zcmp.in, zdiff.in. + * zdiff.cc: New file implementing zdiff functionality in C++. + * zgrep.cc: New file implementing zgrep functionality in C++. + * All mentions to zegrep and zfgrep have been removed from the + documentation because egrep and fgrep are deprecated. + * ztest.cc: New file implementing ztest functionality in C++. + * Makefile.in: Add quotes to directory names. + * check.sh: Use 'test.txt' instead of 'COPYING' for testing. + * Remove environment safeguards from configure as requested by + Richard Stallman. Now environment variables affect configure. + +2009-10-21 Antonio Diaz Diaz + + * Version 0.7 released. + * New utility; ztest. + * zcat.in: New option '-r, --recursive'. + +2009-10-05 Antonio Diaz Diaz + + * Version 0.6 released. + * zcat.in, zgrep.in: Remove again default compressor. Format of + data read from stdin is now automatically detected. + * Makefile.in: Add option '--name' to help2man invocation. + +2009-10-01 Antonio Diaz Diaz + + * Version 0.5 released. + * zcat.in, zgrep.in: Read again data from stdin. + * Add again default compressor for stdin only. + +2009-09-17 Antonio Diaz Diaz + + * Version 0.4 released. + * Add two new utilities; zegrep and zfgrep. + * Add zutils executable which recognizes file formats. + +2009-08-28 Antonio Diaz Diaz + + * Version 0.3 released. + * Remove default compressor. + * zcat.in, zgrep.in: Don't read data from stdin. + * Update home page and mailing list addresses. + +2009-08-13 Antonio Diaz Diaz + + * Version 0.2 released. + * Add support for xz. + +2009-08-07 Antonio Diaz Diaz + + * Version 0.1 released. + + +Copyright (C) 2009-2021 Antonio Diaz Diaz. + +This file is a collection of facts, and thus it is not copyrightable, +but just in case, you have unlimited permission to copy, distribute, and +modify it. diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..3c9c429 --- /dev/null +++ b/INSTALL @@ -0,0 +1,81 @@ +Requirements +------------ +You will need a C++11 compiler. (gcc 3.3.6 or newer is recommended). +I use gcc 6.1.0 and 4.1.2, but the code should compile with any standards +compliant compiler. +Gcc is available at http://gcc.gnu.org. + +POSIX compliant versions of diff and grep are required for zdiff and zgrep. + +(Option -L of zgrep fails (prints wrong results, returns wrong status, and +even hangs) when using GNU grep versions 3.2 to 3.4 inclusive because of a +wrong change in the exit status of grep, which was reverted in GNU grep 3.5). + +Compressors for bzip2, gzip and lzip formats are required to run the tests. + +If you are installing zutils along with GNU gzip and want to keep the +gzip scripts, the recommended method is to configure gzip as follows: + + ./configure --program-transform-name='s/^z/gz/' + +This renames, at installation time, the gzip scripts and man pages to +'gzcat', 'gzcat.1', etc, avoiding the name clashing with the programs +and man pages from zutils. + + +Procedure +--------- +1. Unpack the archive if you have not done so already: + + tar -xf zutils[version].tar.lz +or + lzip -cd zutils[version].tar.lz | tar -xf - + +This creates the directory ./zutils[version] containing the source from +the main archive. + +2. Change to zutils directory and run configure. + (Try 'configure --help' for usage instructions). + + cd zutils[version] + ./configure + +3. Run make. + + make + +4. Optionally, type 'make check' to run the tests that come with zutils. + +5. Type 'make install' to install the programs and any data files and + documentation. + + Or type 'make install-compress', which additionally compresses the + info manual and the man pages after installation. + (Installing compressed docs may become the default in the future). + + You can install only the programs, the info manual, or the man pages by + typing 'make install-bin', 'make install-info', or 'make install-man' + respectively. + + +Another way +----------- +You can also compile zutils into a separate directory. +To do this, you must use a version of 'make' that supports the variable +'VPATH', such as GNU 'make'. 'cd' to the directory where you want the +object files and executables to go and run the 'configure' script. +'configure' automatically checks for the source code in '.', in '..', and +in the directory that 'configure' is in. + +'configure' recognizes the option '--srcdir=DIR' to control where to +look for the sources. Usually 'configure' can determine that directory +automatically. + +After running 'configure', you can run 'make' and 'make install' as +explained above. + + +Copyright (C) 2009-2021 Antonio Diaz Diaz. + +This file is free documentation: you have unlimited permission to copy, +distribute, and modify it. diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 0000000..55a974e --- /dev/null +++ b/Makefile.in @@ -0,0 +1,238 @@ + +DISTNAME = $(pkgname)-$(pkgversion) +INSTALL = install +INSTALL_PROGRAM = $(INSTALL) -m 755 +INSTALL_SCRIPT = $(INSTALL) -m 755 +INSTALL_DATA = $(INSTALL) -m 644 +INSTALL_DIR = $(INSTALL) -d -m 755 +SHELL = /bin/sh +CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 + +objs = arg_parser.o rc.o zutils.o \ + zcat.o zcmp.o zdiff.o zgrep.o ztest.o zupdate.o +zcat_objs = arg_parser.o rc.o zutils.o zcat.o +zcmp_objs = arg_parser.o rc.o zutils.o zcmp.o +zdiff_objs = arg_parser.o rc.o zutils.o zdiff.o +zgrep_objs = arg_parser.o rc.o zutils.o zgrep.o +ztest_objs = arg_parser.o rc.o zutils.o ztest.o +zupdate_objs = arg_parser.o rc.o zupdate.o +programs = zcat zcmp zdiff zgrep ztest zupdate +scripts = zegrep zfgrep + + +.PHONY : all install install-bin install-info install-man \ + install-strip install-compress install-strip-compress \ + install-bin-strip install-info-compress install-man-compress \ + uninstall uninstall-bin uninstall-info uninstall-man \ + doc info man check dist clean distclean + +all : $(programs) $(scripts) + +zcat : $(zcat_objs) + $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(zcat_objs) + +zcmp : $(zcmp_objs) + $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(zcmp_objs) + +zdiff : $(zdiff_objs) + $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(zdiff_objs) + +zegrep : zegrep.in + cat $(VPATH)/zegrep.in > $@ + chmod a+x zegrep + +zfgrep : zfgrep.in + cat $(VPATH)/zfgrep.in > $@ + chmod a+x zfgrep + +zgrep : $(zgrep_objs) + $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(zgrep_objs) + +ztest : $(ztest_objs) + $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(ztest_objs) + +zupdate : $(zupdate_objs) + $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(zupdate_objs) + +rc.o : rc.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -DSYSCONFDIR=\"$(sysconfdir)\" -c -o $@ $< + +zdiff.o : zdiff.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DDIFF=\"$(DIFF)\" -c -o $@ $< + +zgrep.o : zgrep.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DGREP=\"$(GREP)\" -c -o $@ $< + +%.o : %.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< + +$(objs) : Makefile +$(scripts) : Makefile +arg_parser.o : arg_parser.h +rc.o : arg_parser.h rc.h +zcat.o : arg_parser.h rc.h zutils.h recursive.cc zcatgrep.cc +zcmp.o : arg_parser.h rc.h zutils.h zcmpdiff.cc +zdiff.o : arg_parser.h rc.h zutils.h zcmpdiff.cc +zgrep.o : arg_parser.h rc.h zutils.h recursive.cc zcatgrep.cc +ztest.o : arg_parser.h rc.h zutils.h recursive.cc +zupdate.o : arg_parser.h rc.h recursive.cc +zutils.o : rc.h zutils.h + + +doc : info man + +info : $(VPATH)/doc/$(pkgname).info + +$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi + cd $(VPATH)/doc && makeinfo $(pkgname).texi + +man : $(VPATH)/doc/zcat.1 $(VPATH)/doc/zcmp.1 $(VPATH)/doc/zdiff.1 \ + $(VPATH)/doc/zgrep.1 $(VPATH)/doc/ztest.1 $(VPATH)/doc/zupdate.1 + +$(VPATH)/doc/zcat.1 : zcat + help2man -n 'decompress and concatenate files to standard output' \ + -o $@ --no-info ./zcat + +$(VPATH)/doc/zcmp.1 : zcmp + help2man -n 'decompress and compare two files byte by byte' \ + -o $@ --no-info ./zcmp + +$(VPATH)/doc/zdiff.1 : zdiff + help2man -n 'decompress and compare two files line by line' \ + -o $@ --no-info ./zdiff + +$(VPATH)/doc/zgrep.1 : zgrep + help2man -n 'search compressed files for a regular expression' \ + -o $@ --no-info ./zgrep + +$(VPATH)/doc/ztest.1 : ztest + help2man -n 'verify the integrity of compressed files' \ + -o $@ --no-info ./ztest + +$(VPATH)/doc/zupdate.1 : zupdate + help2man -n 'recompress bzip2, gzip, xz files to lzip format' \ + -o $@ --no-info ./zupdate + +Makefile : $(VPATH)/configure $(VPATH)/Makefile.in + ./config.status + +check : all + @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion) + +install : install-bin install-info install-man +install-strip : install-bin-strip install-info install-man +install-compress : install-bin install-info-compress install-man-compress +install-strip-compress : install-bin-strip install-info-compress install-man-compress + +install-bin : all + if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi + $(INSTALL_PROGRAM) ./zcat "$(DESTDIR)$(bindir)/zcat" + $(INSTALL_PROGRAM) ./zcmp "$(DESTDIR)$(bindir)/zcmp" + $(INSTALL_PROGRAM) ./zdiff "$(DESTDIR)$(bindir)/zdiff" + $(INSTALL_SCRIPT) ./zegrep "$(DESTDIR)$(bindir)/zegrep" + $(INSTALL_SCRIPT) ./zfgrep "$(DESTDIR)$(bindir)/zfgrep" + $(INSTALL_PROGRAM) ./zgrep "$(DESTDIR)$(bindir)/zgrep" + $(INSTALL_PROGRAM) ./ztest "$(DESTDIR)$(bindir)/ztest" + $(INSTALL_PROGRAM) ./zupdate "$(DESTDIR)$(bindir)/zupdate" + if [ ! -e "$(DESTDIR)$(sysconfdir)/$(pkgname)rc" ] ; then \ + if [ ! -d "$(DESTDIR)$(sysconfdir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(sysconfdir)" ; fi ; \ + $(INSTALL_DATA) $(VPATH)/$(pkgname)rc "$(DESTDIR)$(sysconfdir)/$(pkgname)rc" ; \ + fi + +install-bin-strip : all + $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install-bin + +install-info : + if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi + -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* + $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info" + -if $(CAN_RUN_INSTALLINFO) ; then \ + install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + fi + +install-info-compress : install-info + lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info" + +install-man : + if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi + -rm -f "$(DESTDIR)$(mandir)/man1/zcat.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zcmp.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zdiff.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zgrep.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/ztest.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zupdate.1"* + $(INSTALL_DATA) $(VPATH)/doc/zcat.1 "$(DESTDIR)$(mandir)/man1/zcat.1" + $(INSTALL_DATA) $(VPATH)/doc/zcmp.1 "$(DESTDIR)$(mandir)/man1/zcmp.1" + $(INSTALL_DATA) $(VPATH)/doc/zdiff.1 "$(DESTDIR)$(mandir)/man1/zdiff.1" + $(INSTALL_DATA) $(VPATH)/doc/zgrep.1 "$(DESTDIR)$(mandir)/man1/zgrep.1" + $(INSTALL_DATA) $(VPATH)/doc/ztest.1 "$(DESTDIR)$(mandir)/man1/ztest.1" + $(INSTALL_DATA) $(VPATH)/doc/zupdate.1 "$(DESTDIR)$(mandir)/man1/zupdate.1" + +install-man-compress : install-man + lzip -v -9 "$(DESTDIR)$(mandir)/man1/zcat.1" + lzip -v -9 "$(DESTDIR)$(mandir)/man1/zcmp.1" + lzip -v -9 "$(DESTDIR)$(mandir)/man1/zdiff.1" + lzip -v -9 "$(DESTDIR)$(mandir)/man1/zgrep.1" + lzip -v -9 "$(DESTDIR)$(mandir)/man1/ztest.1" + lzip -v -9 "$(DESTDIR)$(mandir)/man1/zupdate.1" + +uninstall : uninstall-man uninstall-info uninstall-bin + +uninstall-bin : + -rm -f "$(DESTDIR)$(bindir)/zcat" + -rm -f "$(DESTDIR)$(bindir)/zcmp" + -rm -f "$(DESTDIR)$(bindir)/zdiff" + -rm -f "$(DESTDIR)$(bindir)/zegrep" + -rm -f "$(DESTDIR)$(bindir)/zfgrep" + -rm -f "$(DESTDIR)$(bindir)/zgrep" + -rm -f "$(DESTDIR)$(bindir)/ztest" + -rm -f "$(DESTDIR)$(bindir)/zupdate" + -rm -f "$(DESTDIR)$(sysconfdir)/$(pkgname)rc" + +uninstall-info : + -if $(CAN_RUN_INSTALLINFO) ; then \ + install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + fi + -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* + +uninstall-man : + -rm -f "$(DESTDIR)$(mandir)/man1/zcat.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zcmp.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zdiff.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zgrep.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/ztest.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zupdate.1"* + +dist : doc + ln -sf $(VPATH) $(DISTNAME) + tar -Hustar --owner=root --group=root -cvf $(DISTNAME).tar \ + $(DISTNAME)/AUTHORS \ + $(DISTNAME)/COPYING \ + $(DISTNAME)/ChangeLog \ + $(DISTNAME)/INSTALL \ + $(DISTNAME)/Makefile.in \ + $(DISTNAME)/NEWS \ + $(DISTNAME)/README \ + $(DISTNAME)/configure \ + $(DISTNAME)/doc/*.1 \ + $(DISTNAME)/doc/$(pkgname).info \ + $(DISTNAME)/doc/$(pkgname).texi \ + $(DISTNAME)/$(pkgname)rc \ + $(DISTNAME)/*.h \ + $(DISTNAME)/*.cc \ + $(DISTNAME)/z*.in \ + $(DISTNAME)/testsuite/check.sh \ + $(DISTNAME)/testsuite/test.txt \ + $(DISTNAME)/testsuite/test.txt.tar \ + $(DISTNAME)/testsuite/zcat_vs.dat \ + $(DISTNAME)/testsuite/test_bad_crc.lz \ + $(DISTNAME)/testsuite/zero_bad_crc.lz \ + $(DISTNAME)/testsuite/zero_bad_crc.gz + rm -f $(DISTNAME) + lzip -v -9 $(DISTNAME).tar + +clean : + -rm -f $(programs) $(scripts) $(objs) + +distclean : clean + -rm -f Makefile config.status *.tar *.tar.lz diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..4b62179 --- /dev/null +++ b/NEWS @@ -0,0 +1,9 @@ +Changes in version 1.10: + +A portability issue with Solaris 10 has been fixed. + +It has been documented in the manual that 'zgrep -L' fails with GNU grep +versions 3.2 to 3.4 inclusive because of a wrong change reverted in GNU grep +3.5. + +'make check' now tests empty input files with all tools except zupdate. diff --git a/README b/README new file mode 100644 index 0000000..8725fe3 --- /dev/null +++ b/README @@ -0,0 +1,47 @@ +Description + +Zutils is a collection of utilities able to process any combination of +compressed and uncompressed files transparently. If any file given, +including standard input, is compressed, its decompressed content is used. +Compressed files are decompressed on the fly; no temporary files are +created. + +These utilities are not wrapper scripts but safer and more efficient C++ +programs. In particular the option '--recursive' is very efficient in +those utilities supporting it. + +The utilities provided are zcat, zcmp, zdiff, zgrep, ztest, and zupdate. +The formats supported are bzip2, gzip, lzip, and xz. +Zutils uses external compressors. The compressor to be used for each format +is configurable at runtime. + +zcat, zcmp, zdiff, and zgrep are improved replacements for the shell scripts +provided by GNU gzip. ztest is unique to zutils. zupdate is similar to +gzip's znew. + +NOTE: Bzip2 and lzip provide well-defined values of exit status, which makes +them safe to use with zutils. Gzip and xz may return ambiguous warning +values, making them less reliable back ends for zutils. + +FORMAT NOTE 1: The option '--format' allows the processing of a subset +of formats in recursive mode and when trying compressed file names: +'zgrep foo -r --format=bz2,lz somedir somefile.tar'. + +FORMAT NOTE 2: If the option '--force-format' is given, the files are +passed to the corresponding decompressor without verifying their format, +allowing for example the processing of compress'd (.Z) files with gzip: +'zcmp --force-format=gz file.Z file.lz'. + +LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never have +been compressed. Decompressed is used to refer to data which have undergone +the process of decompression. + + +Copyright (C) 2009-2021 Antonio Diaz Diaz. + +This file is free documentation: you have unlimited permission to copy, +distribute, and modify it. + +The file Makefile.in is a data file used by configure to produce the +Makefile. It has the same copyright owner and permissions that configure +itself. diff --git a/arg_parser.cc b/arg_parser.cc new file mode 100644 index 0000000..2e40a13 --- /dev/null +++ b/arg_parser.cc @@ -0,0 +1,196 @@ +/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) + Copyright (C) 2006-2021 Antonio Diaz Diaz. + + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#include +#include +#include + +#include "arg_parser.h" + + +bool Arg_parser::parse_long_option( const char * const opt, const char * const arg, + const Option options[], int & argind ) + { + unsigned len; + int index = -1; + bool exact = false, ambig = false; + + for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ; + + // Test all long options for either exact match or abbreviated matches. + for( int i = 0; options[i].code != 0; ++i ) + if( options[i].name && std::strncmp( options[i].name, &opt[2], len ) == 0 ) + { + if( std::strlen( options[i].name ) == len ) // Exact match found + { index = i; exact = true; break; } + else if( index < 0 ) index = i; // First nonexact match found + else if( options[index].code != options[i].code || + options[index].has_arg != options[i].has_arg ) + ambig = true; // Second or later nonexact match found + } + + if( ambig && !exact ) + { + error_ = "option '"; error_ += opt; error_ += "' is ambiguous"; + return false; + } + + if( index < 0 ) // nothing found + { + error_ = "unrecognized option '"; error_ += opt; error_ += '\''; + return false; + } + + ++argind; + data.push_back( Record( options[index].code ) ); + + if( opt[len+2] ) // '--=' syntax + { + if( options[index].has_arg == no ) + { + error_ = "option '--"; error_ += options[index].name; + error_ += "' doesn't allow an argument"; + return false; + } + if( options[index].has_arg == yes && !opt[len+3] ) + { + error_ = "option '--"; error_ += options[index].name; + error_ += "' requires an argument"; + return false; + } + data.back().argument = &opt[len+3]; + return true; + } + + if( options[index].has_arg == yes ) + { + if( !arg || !arg[0] ) + { + error_ = "option '--"; error_ += options[index].name; + error_ += "' requires an argument"; + return false; + } + ++argind; data.back().argument = arg; + return true; + } + + return true; + } + + +bool Arg_parser::parse_short_option( const char * const opt, const char * const arg, + const Option options[], int & argind ) + { + int cind = 1; // character index in opt + + while( cind > 0 ) + { + int index = -1; + const unsigned char c = opt[cind]; + + if( c != 0 ) + for( int i = 0; options[i].code; ++i ) + if( c == options[i].code ) + { index = i; break; } + + if( index < 0 ) + { + error_ = "invalid option -- '"; error_ += c; error_ += '\''; + return false; + } + + data.push_back( Record( c ) ); + if( opt[++cind] == 0 ) { ++argind; cind = 0; } // opt finished + + if( options[index].has_arg != no && cind > 0 && opt[cind] ) + { + data.back().argument = &opt[cind]; ++argind; cind = 0; + } + else if( options[index].has_arg == yes ) + { + if( !arg || !arg[0] ) + { + error_ = "option requires an argument -- '"; error_ += c; + error_ += '\''; + return false; + } + data.back().argument = arg; ++argind; cind = 0; + } + } + return true; + } + + +Arg_parser::Arg_parser( const int argc, const char * const argv[], + const Option options[], const bool in_order ) + { + if( argc < 2 || !argv || !options ) return; + + std::vector< const char * > non_options; // skipped non-options + int argind = 1; // index in argv + + while( argind < argc ) + { + const unsigned char ch1 = argv[argind][0]; + const unsigned char ch2 = ch1 ? argv[argind][1] : 0; + + if( ch1 == '-' && ch2 ) // we found an option + { + const char * const opt = argv[argind]; + const char * const arg = ( argind + 1 < argc ) ? argv[argind+1] : 0; + if( ch2 == '-' ) + { + if( !argv[argind][2] ) { ++argind; break; } // we found "--" + else if( !parse_long_option( opt, arg, options, argind ) ) break; + } + else if( !parse_short_option( opt, arg, options, argind ) ) break; + } + else + { + if( in_order ) data.push_back( Record( argv[argind++] ) ); + else non_options.push_back( argv[argind++] ); + } + } + if( !error_.empty() ) data.clear(); + else + { + for( unsigned i = 0; i < non_options.size(); ++i ) + data.push_back( Record( non_options[i] ) ); + while( argind < argc ) + data.push_back( Record( argv[argind++] ) ); + } + } + + +Arg_parser::Arg_parser( const char * const opt, const char * const arg, + const Option options[] ) + { + if( !opt || !opt[0] || !options ) return; + + if( opt[0] == '-' && opt[1] ) // we found an option + { + int argind = 1; // dummy + if( opt[1] == '-' ) + { if( opt[2] ) parse_long_option( opt, arg, options, argind ); } + else + parse_short_option( opt, arg, options, argind ); + if( !error_.empty() ) data.clear(); + } + else data.push_back( Record( opt ) ); + } diff --git a/arg_parser.h b/arg_parser.h new file mode 100644 index 0000000..5629b90 --- /dev/null +++ b/arg_parser.h @@ -0,0 +1,99 @@ +/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) + Copyright (C) 2006-2021 Antonio Diaz Diaz. + + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +/* Arg_parser reads the arguments in 'argv' and creates a number of + option codes, option arguments, and non-option arguments. + + In case of error, 'error' returns a non-empty error message. + + 'options' is an array of 'struct Option' terminated by an element + containing a code which is zero. A null name means a short-only + option. A code value outside the unsigned char range means a + long-only option. + + Arg_parser normally makes it appear as if all the option arguments + were specified before all the non-option arguments for the purposes + of parsing, even if the user of your program intermixed option and + non-option arguments. If you want the arguments in the exact order + the user typed them, call 'Arg_parser' with 'in_order' = true. + + The argument '--' terminates all options; any following arguments are + treated as non-option arguments, even if they begin with a hyphen. + + The syntax for optional option arguments is '-' + (without whitespace), or '--='. +*/ + +class Arg_parser + { +public: + enum Has_arg { no, yes, maybe }; + + struct Option + { + int code; // Short option letter or code ( code != 0 ) + const char * name; // Long option name (maybe null) + Has_arg has_arg; + }; + +private: + struct Record + { + int code; + std::string argument; + explicit Record( const int c ) : code( c ) {} + explicit Record( const char * const arg ) : code( 0 ), argument( arg ) {} + }; + + const std::string empty_arg; + std::string error_; + std::vector< Record > data; + + bool parse_long_option( const char * const opt, const char * const arg, + const Option options[], int & argind ); + bool parse_short_option( const char * const opt, const char * const arg, + const Option options[], int & argind ); + +public: + Arg_parser( const int argc, const char * const argv[], + const Option options[], const bool in_order = false ); + + // Restricted constructor. Parses a single token and argument (if any). + Arg_parser( const char * const opt, const char * const arg, + const Option options[] ); + + const std::string & error() const { return error_; } + + // The number of arguments parsed. May be different from argc. + int arguments() const { return data.size(); } + + /* If code( i ) is 0, argument( i ) is a non-option. + Else argument( i ) is the option's argument (or empty). */ + int code( const int i ) const + { + if( i >= 0 && i < arguments() ) return data[i].code; + else return 0; + } + + const std::string & argument( const int i ) const + { + if( i >= 0 && i < arguments() ) return data[i].argument; + else return empty_arg; + } + }; diff --git a/configure b/configure new file mode 100755 index 0000000..48c7f81 --- /dev/null +++ b/configure @@ -0,0 +1,207 @@ +#! /bin/sh +# configure script for Zutils - Utilities dealing with compressed files +# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# +# This configure script is free software: you have unlimited permission +# to copy, distribute, and modify it. + +pkgname=zutils +pkgversion=1.10 +srctrigger=doc/${pkgname}.texi + +# clear some things potentially inherited from environment. +LC_ALL=C +export LC_ALL +srcdir= +prefix=/usr/local +exec_prefix='$(prefix)' +bindir='$(exec_prefix)/bin' +datarootdir='$(prefix)/share' +infodir='$(datarootdir)/info' +mandir='$(datarootdir)/man' +sysconfdir='$(prefix)/etc' +CXX=g++ +CPPFLAGS= +CXXFLAGS='-Wall -W -O2' +LDFLAGS= +DIFF=diff +GREP=grep + +# checking whether we are using GNU C++. +/bin/sh -c "${CXX} --version" > /dev/null 2>&1 || { CXX=c++ ; CXXFLAGS=-O2 ; } + +# Loop over all args +args= +no_create= +while [ $# != 0 ] ; do + + # Get the first arg, and shuffle + option=$1 ; arg2=no + shift + + # Add the argument quoted to args + if [ -z "${args}" ] ; then args="\"${option}\"" + else args="${args} \"${option}\"" ; fi + + # Split out the argument for options that take them + case ${option} in + *=*) optarg=`echo "${option}" | sed -e 's,^[^=]*=,,;s,/$,,'` ;; + esac + + # Process the options + case ${option} in + --help | -h) + echo "Usage: $0 [OPTION]... [VAR=VALUE]..." + echo + echo "To assign makefile variables (e.g., CXX, CXXFLAGS...), specify them as" + echo "arguments to configure in the form VAR=VALUE." + echo + echo "Options and variables: [defaults in brackets]" + echo " -h, --help display this help and exit" + echo " -V, --version output version information and exit" + echo " --srcdir=DIR find the sources in DIR [. or ..]" + echo " --prefix=DIR install into DIR [${prefix}]" + echo " --exec-prefix=DIR base directory for arch-dependent files [${exec_prefix}]" + echo " --bindir=DIR user executables directory [${bindir}]" + echo " --datarootdir=DIR base directory for doc and data [${datarootdir}]" + echo " --infodir=DIR info files directory [${infodir}]" + echo " --mandir=DIR man pages directory [${mandir}]" + echo " --sysconfdir=DIR read-only single-machine data directory [${sysconfdir}]" + echo " CXX=COMPILER C++ compiler to use [${CXX}]" + echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]" + echo " CXXFLAGS=OPTIONS command line options for the C++ compiler [${CXXFLAGS}]" + echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS" + echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]" + echo " DIFF=NAME diff program to use with zdiff [${DIFF}]" + echo " GREP=NAME grep program to use with zgrep [${GREP}]" + echo + exit 0 ;; + --version | -V) + echo "Configure script for ${pkgname} version ${pkgversion}" + exit 0 ;; + --srcdir) srcdir=$1 ; arg2=yes ;; + --prefix) prefix=$1 ; arg2=yes ;; + --exec-prefix) exec_prefix=$1 ; arg2=yes ;; + --bindir) bindir=$1 ; arg2=yes ;; + --datarootdir) datarootdir=$1 ; arg2=yes ;; + --infodir) infodir=$1 ; arg2=yes ;; + --mandir) mandir=$1 ; arg2=yes ;; + --sysconfdir) sysconfdir=$1 ; arg2=yes ;; + + --srcdir=*) srcdir=${optarg} ;; + --prefix=*) prefix=${optarg} ;; + --exec-prefix=*) exec_prefix=${optarg} ;; + --bindir=*) bindir=${optarg} ;; + --datarootdir=*) datarootdir=${optarg} ;; + --infodir=*) infodir=${optarg} ;; + --mandir=*) mandir=${optarg} ;; + --sysconfdir=*) sysconfdir=${optarg} ;; + --no-create) no_create=yes ;; + + CXX=*) CXX=${optarg} ;; + CPPFLAGS=*) CPPFLAGS=${optarg} ;; + CXXFLAGS=*) CXXFLAGS=${optarg} ;; + CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;; + LDFLAGS=*) LDFLAGS=${optarg} ;; + DIFF=*) DIFF=${optarg} ;; + GREP=*) GREP=${optarg} ;; + + --*) + echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; + *=* | *-*-*) ;; + *) + echo "configure: unrecognized option: '${option}'" 1>&2 + echo "Try 'configure --help' for more information." 1>&2 + exit 1 ;; + esac + + # Check if the option took a separate argument + if [ "${arg2}" = yes ] ; then + if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift + else echo "configure: Missing argument to '${option}'" 1>&2 + exit 1 + fi + fi +done + +# Find the source files, if location was not specified. +srcdirtext= +if [ -z "${srcdir}" ] ; then + srcdirtext="or . or .." ; srcdir=. + if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi + if [ ! -r "${srcdir}/${srctrigger}" ] ; then + ## the sed command below emulates the dirname command + srcdir=`echo "$0" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` + fi +fi + +if [ ! -r "${srcdir}/${srctrigger}" ] ; then + echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2 + echo "configure: (At least ${srctrigger} is missing)." 1>&2 + exit 1 +fi + +# Set srcdir to . if that's what it is. +if [ "`pwd`" = "`cd "${srcdir}" ; pwd`" ] ; then srcdir=. ; fi + +echo +if [ -z "${no_create}" ] ; then + echo "creating config.status" + rm -f config.status + cat > config.status << EOF +#! /bin/sh +# This file was generated automatically by configure. Don't edit. +# Run this file to recreate the current configuration. +# +# This script is free software: you have unlimited permission +# to copy, distribute, and modify it. + +exec /bin/sh $0 ${args} --no-create +EOF + chmod +x config.status +fi + +echo "creating Makefile" +echo "VPATH = ${srcdir}" +echo "prefix = ${prefix}" +echo "exec_prefix = ${exec_prefix}" +echo "bindir = ${bindir}" +echo "datarootdir = ${datarootdir}" +echo "infodir = ${infodir}" +echo "mandir = ${mandir}" +echo "sysconfdir = ${sysconfdir}" +echo "CXX = ${CXX}" +echo "CPPFLAGS = ${CPPFLAGS}" +echo "CXXFLAGS = ${CXXFLAGS}" +echo "LDFLAGS = ${LDFLAGS}" +echo "DIFF = ${DIFF}" +echo "GREP = ${GREP}" +rm -f Makefile +cat > Makefile << EOF +# Makefile for Zutils - Utilities dealing with compressed files +# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# This file was generated automatically by configure. Don't edit. +# +# This Makefile is free software: you have unlimited permission +# to copy, distribute, and modify it. + +pkgname = ${pkgname} +pkgversion = ${pkgversion} +VPATH = ${srcdir} +prefix = ${prefix} +exec_prefix = ${exec_prefix} +bindir = ${bindir} +datarootdir = ${datarootdir} +infodir = ${infodir} +mandir = ${mandir} +sysconfdir = ${sysconfdir} +CXX = ${CXX} +CPPFLAGS = ${CPPFLAGS} +CXXFLAGS = ${CXXFLAGS} +LDFLAGS = ${LDFLAGS} +DIFF = ${DIFF} +GREP = ${GREP} +EOF +cat "${srcdir}/Makefile.in" >> Makefile + +echo "OK. Now you can run make." diff --git a/doc/zcat.1 b/doc/zcat.1 new file mode 100644 index 0000000..fbaf821 --- /dev/null +++ b/doc/zcat.1 @@ -0,0 +1,103 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. +.TH ZCAT "1" "January 2021" "zutils 1.10" "User Commands" +.SH NAME +zcat \- decompress and concatenate files to standard output +.SH SYNOPSIS +.B zcat +[\fI\,options\/\fR] [\fI\,files\/\fR] +.SH DESCRIPTION +zcat copies each file argument to standard output in sequence. If any +file given is compressed, its decompressed content is copied. If a file +given does not exist, and its name does not end with one of the known +extensions, zcat tries the compressed file names corresponding to the +formats supported. If a file fails to decompress, zcat continues copying the +rest of the files. +.PP +If a file is specified as '\-', data are read from standard input, +decompressed if needed, and sent to standard output. Data read from +standard input must be of the same type; all uncompressed or all in the +same compressed format. +.PP +If no files are specified, recursive searches examine the current +working directory, and nonrecursive searches read standard input. +.PP +The formats supported are bzip2, gzip, lzip, and xz. +.PP +Exit status is 0 if no errors occurred, 1 otherwise. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-A\fR, \fB\-\-show\-all\fR +equivalent to '\-vET' +.TP +\fB\-b\fR, \fB\-\-number\-nonblank\fR +number nonblank output lines +.TP +\fB\-e\fR +equivalent to '\-vE' +.TP +\fB\-E\fR, \fB\-\-show\-ends\fR +display '$' at end of each line +.TP +\fB\-M\fR, \fB\-\-format=\fR +process only the formats in +.TP +\fB\-n\fR, \fB\-\-number\fR +number all output lines +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-O\fR, \fB\-\-force\-format=\fR +force the format given (bz2, gz, lz, xz) +.TP +\fB\-q\fR, \fB\-\-quiet\fR +suppress all messages +.TP +\fB\-r\fR, \fB\-\-recursive\fR +operate recursively on directories +.TP +\fB\-R\fR, \fB\-\-dereference\-recursive\fR +recursively follow symbolic links +.TP +\fB\-s\fR, \fB\-\-squeeze\-blank\fR +never more than one single blank line +.TP +\fB\-t\fR +equivalent to '\-vT' +.TP +\fB\-T\fR, \fB\-\-show\-tabs\fR +display TAB characters as '^I' +.TP +\fB\-v\fR, \fB\-\-show\-nonprinting\fR +use '^' and 'M\-' notation, except for LF and TAB +.TP +\fB\-\-verbose\fR +verbose mode (show error messages) +.TP +\fB\-\-bz2=\fR +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR +set compressor and options for xz format +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2021 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. diff --git a/doc/zcmp.1 b/doc/zcmp.1 new file mode 100644 index 0000000..5ae5231 --- /dev/null +++ b/doc/zcmp.1 @@ -0,0 +1,90 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. +.TH ZCMP "1" "January 2021" "zutils 1.10" "User Commands" +.SH NAME +zcmp \- decompress and compare two files byte by byte +.SH SYNOPSIS +.B zcmp +[\fI\,options\/\fR] \fI\,file1 \/\fR[\fI\,file2\/\fR] +.SH DESCRIPTION +zcmp compares two files and, if they differ, writes to standard output the +first byte and line number where they differ. Bytes and lines are numbered +starting with 1. A hyphen '\-' used as a file argument means standard input. +If any file given is compressed, its decompressed content is used. Compressed +files are decompressed on the fly; no temporary files are created. +.PP +The formats supported are bzip2, gzip, lzip, and xz. +.PP +zcmp compares file1 to file2. The standard input is used only if file1 or +file2 refers to standard input. If file2 is omitted zcmp tries the +following: +.IP +\- If file1 is compressed, compares its decompressed contents with +the corresponding uncompressed file (the name of file1 with the +extension removed). +.IP +\- If file1 is uncompressed, compares it with the decompressed +contents of file1.[lz|bz2|gz|xz] (the first one that is found). +.PP +Exit status is 0 if inputs are identical, 1 if different, 2 if trouble. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-b\fR, \fB\-\-print\-bytes\fR +print differing bytes +.TP +\fB\-i\fR, \fB\-\-ignore\-initial=\fR[:] +ignore differences in the first bytes +.TP +\fB\-l\fR, \fB\-\-list\fR +list position, value of all differing bytes +.TP +\fB\-M\fR, \fB\-\-format=\fR +process only the formats in +.TP +\fB\-n\fR, \fB\-\-bytes=\fR +compare at most bytes +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-O\fR, \fB\-\-force\-format\fR=\fI\,[\/\fR][,] +force the formats given (bz2, gz, lz, xz) +.TP +\fB\-q\fR, \fB\-\-quiet\fR +suppress all messages +.TP +\fB\-s\fR, \fB\-\-silent\fR +(same as \fB\-\-quiet\fR) +.TP +\fB\-v\fR, \fB\-\-verbose\fR +verbose mode (same as \fB\-\-list\fR) +.TP +\fB\-\-bz2=\fR +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR +set compressor and options for xz format +.PP +Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, +Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2021 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. diff --git a/doc/zdiff.1 b/doc/zdiff.1 new file mode 100644 index 0000000..65a34b7 --- /dev/null +++ b/doc/zdiff.1 @@ -0,0 +1,121 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. +.TH ZDIFF "1" "January 2021" "zutils 1.10" "User Commands" +.SH NAME +zdiff \- decompress and compare two files line by line +.SH SYNOPSIS +.B zdiff +[\fI\,options\/\fR] \fI\,file1 \/\fR[\fI\,file2\/\fR] +.SH DESCRIPTION +zdiff compares two files and, if they differ, writes to standard output the +differences line by line. A hyphen '\-' used as a file argument means standard +input. If any file given is compressed, its decompressed content is used. +zdiff is a front end to the program diff and has the limitation that messages +from diff refer to temporary file names instead of those specified. +.PP +The formats supported are bzip2, gzip, lzip, and xz. +.PP +zdiff compares file1 to file2. The standard input is used only if file1 or +file2 refers to standard input. If file2 is omitted zdiff tries the +following: +.IP +\- If file1 is compressed, compares its decompressed contents with +the corresponding uncompressed file (the name of file1 with the +extension removed). +.IP +\- If file1 is uncompressed, compares it with the decompressed +contents of file1.[lz|bz2|gz|xz] (the first one that is found). +.PP +Exit status is 0 if inputs are identical, 1 if different, 2 if trouble. +Some options only work if the diff program used supports them. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-a\fR, \fB\-\-text\fR +treat all files as text +.TP +\fB\-b\fR, \fB\-\-ignore\-space\-change\fR +ignore changes in the amount of white space +.TP +\fB\-B\fR, \fB\-\-ignore\-blank\-lines\fR +ignore changes whose lines are all blank +.TP +\fB\-c\fR +use the context output format +.TP +\fB\-C\fR, \fB\-\-context=\fR +same as \fB\-c\fR but use lines of context +.TP +\fB\-d\fR, \fB\-\-minimal\fR +try hard to find a smaller set of changes +.TP +\fB\-E\fR, \fB\-\-ignore\-tab\-expansion\fR +ignore changes due to tab expansion +.TP +\fB\-i\fR, \fB\-\-ignore\-case\fR +ignore case differences in file contents +.TP +\fB\-M\fR, \fB\-\-format=\fR +process only the formats in +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-O\fR, \fB\-\-force\-format\fR=\fI\,[\/\fR][,] +force the formats given (bz2, gz, lz, xz) +.TP +\fB\-p\fR, \fB\-\-show\-c\-function\fR +show which C function each change is in +.TP +\fB\-q\fR, \fB\-\-brief\fR +output only whether files differ +.TP +\fB\-s\fR, \fB\-\-report\-identical\-files\fR +report when two files are identical +.TP +\fB\-t\fR, \fB\-\-expand\-tabs\fR +expand tabs to spaces in output +.TP +\fB\-T\fR, \fB\-\-initial\-tab\fR +make tabs line up by prepending a tab +.TP +\fB\-u\fR +use the unified output format +.TP +\fB\-U\fR, \fB\-\-unified=\fR +same as \fB\-u\fR but use lines of context +.TP +\fB\-w\fR, \fB\-\-ignore\-all\-space\fR +ignore all white space +.TP +\fB\-W\fR, \fB\-\-width=\fR +output at most print columns +.TP +\fB\-y\fR, \fB\-\-side\-by\-side\fR +output in two columns +.TP +\fB\-\-bz2=\fR +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR +set compressor and options for xz format +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2021 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. diff --git a/doc/zgrep.1 b/doc/zgrep.1 new file mode 100644 index 0000000..69ed0cd --- /dev/null +++ b/doc/zgrep.1 @@ -0,0 +1,153 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. +.TH ZGREP "1" "January 2021" "zutils 1.10" "User Commands" +.SH NAME +zgrep \- search compressed files for a regular expression +.SH SYNOPSIS +.B zgrep +[\fI\,options\/\fR] \fI\, \/\fR[\fI\,files\/\fR] +.SH DESCRIPTION +zgrep is a front end to the program grep that allows transparent search +on any combination of compressed and uncompressed files. If any file +given is compressed, its decompressed content is used. If a file given +does not exist, and its name does not end with one of the known +extensions, zgrep tries the compressed file names corresponding to the +formats supported. If a file fails to decompress, zgrep continues +searching the rest of the files. +.PP +If a file is specified as '\-', data are read from standard input, +decompressed if needed, and fed to grep. Data read from standard input +must be of the same type; all uncompressed or all in the same +compressed format. +.PP +If no files are specified, recursive searches examine the current +working directory, and nonrecursive searches read standard input. +.PP +The formats supported are bzip2, gzip, lzip, and xz. +.PP +Exit status is 0 if match, 1 if no match, 2 if trouble. +Some options only work if the grep program used supports them. +.SH OPTIONS +.TP +\fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-a\fR, \fB\-\-text\fR +treat all files as text +.TP +\fB\-A\fR, \fB\-\-after\-context=\fR +print lines of trailing context +.TP +\fB\-b\fR, \fB\-\-byte\-offset\fR +print the byte offset of each line +.TP +\fB\-B\fR, \fB\-\-before\-context=\fR +print lines of leading context +.TP +\fB\-c\fR, \fB\-\-count\fR +only print a count of matching lines per file +.TP +\fB\-C\fR, \fB\-\-context=\fR +print lines of output context +.TP +\fB\-\-color[=\fR] +show matched strings in color +.TP +\fB\-e\fR, \fB\-\-regexp=\fR +use as the pattern to match +.TP +\fB\-E\fR, \fB\-\-extended\-regexp\fR + is an extended regular expression +.TP +\fB\-f\fR, \fB\-\-file=\fR +obtain patterns from +.TP +\fB\-F\fR, \fB\-\-fixed\-strings\fR + is a set of newline\-separated strings +.TP +\fB\-h\fR, \fB\-\-no\-filename\fR +suppress the prefixing filename on output +.TP +\fB\-H\fR, \fB\-\-with\-filename\fR +print the filename for each match +.TP +\fB\-i\fR, \fB\-\-ignore\-case\fR +ignore case distinctions +.TP +\fB\-I\fR +ignore binary files +.TP +\fB\-l\fR, \fB\-\-files\-with\-matches\fR +only print names of files containing matches +.TP +\fB\-L\fR, \fB\-\-files\-without\-match\fR +only print names of files containing no matches +.TP +\fB\-m\fR, \fB\-\-max\-count=\fR +stop after matches +.TP +\fB\-M\fR, \fB\-\-format=\fR +process only the formats in +.TP +\fB\-n\fR, \fB\-\-line\-number\fR +print the line number of each line +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-o\fR, \fB\-\-only\-matching\fR +show only the part of a line matching +.TP +\fB\-O\fR, \fB\-\-force\-format=\fR +force the format given (bz2, gz, lz, xz) +.TP +\fB\-q\fR, \fB\-\-quiet\fR +suppress all messages +.TP +\fB\-r\fR, \fB\-\-recursive\fR +operate recursively on directories +.TP +\fB\-R\fR, \fB\-\-dereference\-recursive\fR +recursively follow symbolic links +.TP +\fB\-s\fR, \fB\-\-no\-messages\fR +suppress error messages +.TP +\fB\-v\fR, \fB\-\-invert\-match\fR +select non\-matching lines +.TP +\fB\-\-verbose\fR +verbose mode (show error messages) +.TP +\fB\-w\fR, \fB\-\-word\-regexp\fR +match only whole words +.TP +\fB\-x\fR, \fB\-\-line\-regexp\fR +match only whole lines +.TP +\fB\-\-bz2=\fR +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR +set compressor and options for xz format +.PP +Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, +Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2021 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. diff --git a/doc/ztest.1 b/doc/ztest.1 new file mode 100644 index 0000000..45cda27 --- /dev/null +++ b/doc/ztest.1 @@ -0,0 +1,82 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. +.TH ZTEST "1" "January 2021" "zutils 1.10" "User Commands" +.SH NAME +ztest \- verify the integrity of compressed files +.SH SYNOPSIS +.B ztest +[\fI\,options\/\fR] [\fI\,files\/\fR] +.SH DESCRIPTION +ztest verifies the integrity of the compressed files specified. +Uncompressed files are ignored. If a file is specified as '\-', the +integrity of compressed data read from standard input is verified. Data +read from standard input must be all in the same compressed format. If +a file fails to decompress, does not exist, can't be opened, or is a +terminal, ztest continues verifying the rest of the files. A final +diagnostic is shown at verbosity level 1 or higher if any file fails the +test when testing multiple files. +.PP +If no files are specified, recursive searches examine the current +working directory, and nonrecursive searches read standard input. +.PP +The formats supported are bzip2, gzip, lzip, and xz. +.PP +Note that error detection in the xz format is broken. First, some xz +files lack integrity information. Second, not all xz decompressors can +verify the integrity of all xz files. Third, section 2.1.1.2 'Stream +Flags' of the xz format specification allows xz decompressors to produce +garbage output without issuing any warning. Therefore, xz files can't +always be verified as reliably as files in the other formats can. +.PP +Exit status is 0 if all compressed files verify OK, 1 if environmental +problems (file not found, invalid flags, I/O errors, etc), 2 if any +compressed file is corrupt or invalid. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-M\fR, \fB\-\-format=\fR +process only the formats in +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-O\fR, \fB\-\-force\-format=\fR +force the format given (bz2, gz, lz, xz) +.TP +\fB\-q\fR, \fB\-\-quiet\fR +suppress all messages +.TP +\fB\-r\fR, \fB\-\-recursive\fR +operate recursively on directories +.TP +\fB\-R\fR, \fB\-\-dereference\-recursive\fR +recursively follow symbolic links +.TP +\fB\-v\fR, \fB\-\-verbose\fR +be verbose (a 2nd \fB\-v\fR gives more) +.TP +\fB\-\-bz2=\fR +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR +set compressor and options for xz format +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2021 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. diff --git a/doc/zupdate.1 b/doc/zupdate.1 new file mode 100644 index 0000000..dcd3d24 --- /dev/null +++ b/doc/zupdate.1 @@ -0,0 +1,91 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. +.TH ZUPDATE "1" "January 2021" "zutils 1.10" "User Commands" +.SH NAME +zupdate \- recompress bzip2, gzip, xz files to lzip format +.SH SYNOPSIS +.B zupdate +[\fI\,options\/\fR] [\fI\,files\/\fR] +.SH DESCRIPTION +zupdate recompresses files from bzip2, gzip, and xz formats to lzip +format. Each original is compared with the new file and then deleted. +Only regular files with standard file name extensions are recompressed, +other files are ignored. Compressed files are decompressed and then +recompressed on the fly; no temporary files are created. The lzip format +is chosen as destination because it is the most appropriate for +long\-term data archiving. +.PP +If no files are specified, recursive searches examine the current +working directory, and nonrecursive searches do nothing. +.PP +If the lzip compressed version of a file already exists, the file is +skipped unless the option '\-\-force' is given. In this case, if the +comparison with the existing lzip version fails, an error is returned +and the original file is not deleted. The operation of zupdate is meant +to be safe and not cause any data loss. Therefore, existing lzip +compressed files are never overwritten nor deleted. +.PP +The names of the original files must have one of the following extensions: +\&'.bz2', '.gz', or '.xz', which are recompressed to '.lz'; +\&'.tbz', '.tbz2', '.tgz', or '.txz', which are recompressed to '.tlz'. +.PP +Exit status is 0 if all the compressed files were successfully recompressed +(if needed), compared, and deleted (if requested). Non\-zero otherwise. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-f\fR, \fB\-\-force\fR +don't skip a file even if the .lz exists +.TP +\fB\-k\fR, \fB\-\-keep\fR +keep (don't delete) input files +.TP +\fB\-l\fR, \fB\-\-lzip\-verbose\fR +pass one option \fB\-v\fR to the lzip compressor +.TP +\fB\-M\fR, \fB\-\-format=\fR +process only the formats in +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-q\fR, \fB\-\-quiet\fR +suppress all messages +.TP +\fB\-r\fR, \fB\-\-recursive\fR +operate recursively on directories +.TP +\fB\-R\fR, \fB\-\-dereference\-recursive\fR +recursively follow symbolic links +.TP +\fB\-v\fR, \fB\-\-verbose\fR +be verbose (a 2nd \fB\-v\fR gives more) +.TP +\fB\-0\fR .. \fB\-9\fR +set compression level [default 9] +.TP +\fB\-\-bz2=\fR +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR +set compressor and options for xz format +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2021 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. diff --git a/doc/zutils.info b/doc/zutils.info new file mode 100644 index 0000000..854100f --- /dev/null +++ b/doc/zutils.info @@ -0,0 +1,838 @@ +This is zutils.info, produced by makeinfo version 4.13+ from zutils.texi. + +INFO-DIR-SECTION Data Compression +START-INFO-DIR-ENTRY +* Zutils: (zutils). Utilities dealing with compressed files +END-INFO-DIR-ENTRY + + +File: zutils.info, Node: Top, Next: Introduction, Up: (dir) + +Zutils Manual +************* + +This manual is for Zutils (version 1.10, 5 January 2021). + +* Menu: + +* Introduction:: Purpose and features of zutils +* Common options:: Options common to all utilities +* The zutilsrc file:: The zutils configuration file +* Zcat:: Concatenating compressed files +* Zcmp:: Comparing compressed files byte by byte +* Zdiff:: Comparing compressed files line by line +* Zgrep:: Searching inside compressed files +* Ztest:: Testing the integrity of compressed files +* Zupdate:: Recompressing files to lzip format +* Problems:: Reporting bugs +* Concept index:: Index of concepts + + + Copyright (C) 2009-2021 Antonio Diaz Diaz. + + This manual is free documentation: you have unlimited permission to copy, +distribute, and modify it. + + +File: zutils.info, Node: Introduction, Next: Common options, Prev: Top, Up: Top + +1 Introduction +************** + +Zutils is a collection of utilities able to process any combination of +compressed and uncompressed files transparently. If any file given, +including standard input, is compressed, its decompressed content is used. +Compressed files are decompressed on the fly; no temporary files are +created. + + These utilities are not wrapper scripts but safer and more efficient C++ +programs. In particular the option '--recursive' is very efficient in those +utilities supporting it. + +The utilities provided are zcat, zcmp, zdiff, zgrep, ztest, and zupdate. +The formats supported are bzip2, gzip, lzip, and xz. +Zutils uses external compressors. The compressor to be used for each format +is configurable at runtime. + + zcat, zcmp, zdiff, and zgrep are improved replacements for the shell +scripts provided by GNU gzip. ztest is unique to zutils. zupdate is similar +to gzip's znew. + + NOTE: Bzip2 and lzip provide well-defined values of exit status, which +makes them safe to use with zutils. Gzip and xz may return ambiguous warning +values, making them less reliable back ends for zutils. *Note +compressor-requirements::. + + FORMAT NOTE 1: The option '--format' allows the processing of a subset +of formats in recursive mode and when trying compressed file names: +'zgrep foo -r --format=bz2,lz somedir somefile.tar'. + + FORMAT NOTE 2: If the option '--force-format' is given, the files are +passed to the corresponding decompressor without verifying their format, +allowing for example the processing of compress'd (.Z) files with gzip: +'zcmp --force-format=gz file.Z file.lz'. + + LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never +have been compressed. Decompressed is used to refer to data which have +undergone the process of decompression. + + + Numbers given as arguments to options (positions, sizes) may be followed +by a multiplier and an optional 'B' for "byte". + + Table of SI and binary prefixes (unit multipliers): + +Prefix Value | Prefix Value +k kilobyte (10^3 = 1000) | Ki kibibyte (2^10 = 1024) +M megabyte (10^6) | Mi mebibyte (2^20) +G gigabyte (10^9) | Gi gibibyte (2^30) +T terabyte (10^12) | Ti tebibyte (2^40) +P petabyte (10^15) | Pi pebibyte (2^50) +E exabyte (10^18) | Ei exbibyte (2^60) +Z zettabyte (10^21) | Zi zebibyte (2^70) +Y yottabyte (10^24) | Yi yobibyte (2^80) + + +File: zutils.info, Node: Common options, Next: The zutilsrc file, Prev: Introduction, Up: Top + +2 Common options +**************** + +The following options: are available in all the utilities. Rather than +writing identical descriptions for each of the programs, they are described +here. *Note Argument syntax: (arg_parser)Argument syntax. + +'-h' +'--help' + Print an informative help message describing the options and exit. + zgrep only supports the '--help' form of this option. + +'-V' +'--version' + Print the version number on the standard output and exit. This version + number should be included in all bug reports. + +'-M FORMAT_LIST' +'--format=FORMAT_LIST' + Process only the formats listed in the comma-separated FORMAT_LIST. + Valid formats are 'bz2', 'gz', 'lz', 'xz', and 'un' for + 'uncompressed', meaning "any file name without a known extension". + This option excludes files based on extension, instead of format, + because it is more efficient. The exclusion only applies to names + generated automatically (for example when adding extensions to a file + name or when operating recursively on directories). Files given in the + command line are always processed. + + Each format in FORMAT_LIST enables file names with the following + extensions: + + bz2 enables .bz2 .tbz .tbz2 + gz enables .gz .tgz + lz enables .lz .tlz + xz enables .xz .txz + un enables any other file name + +'-N' +'--no-rcfile' + Don't read the runtime configuration file 'zutilsrc'. + +'--bz2=COMMAND' +'--gz=COMMAND' +'--lz=COMMAND' +'--xz=COMMAND' + Set program to be used as (de)compressor for the corresponding format. + COMMAND may include arguments. For example '--lz='plzip --threads=2''. + The program set with '--lz' is used for both compression and + decompression. The other three are used only for decompression. The + name of the program can't begin with '-'. These options override the + values set in 'zutilsrc'. The compression program used must meet three + requirements: + + 1. When called with the option '-d', it must read compressed data + from the standard input and produce decompressed data on the + standard output. + + 2. If the option '-q' is passed to zutils, the compression program + must also accept it. + + 3. It must return 0 if no errors occurred, and a non-zero value + otherwise. + + + +File: zutils.info, Node: The zutilsrc file, Next: Zcat, Prev: Common options, Up: Top + +3 The zutils configuration file 'zutilsrc' +****************************************** + +'zutilsrc' is the runtime configuration file for zutils. In it you may +define the compressor name and options to be used for each format. +'zutilsrc' is optional; you don't need to install it in order to run zutils. + + The compressors specified in the command line override those specified +in 'zutilsrc'. + + You may copy the system 'zutilsrc' file '${sysconfdir}/zutilsrc' to +'$HOME/.zutilsrc' and customize these options as you like. The file syntax +is fairly obvious (and there are further instructions in it): + + 1. Any line beginning with '#' is a comment line. + + 2. Each non-comment line defines the command to be used for the + corresponding format, with the syntax: + = [options] + where is one of 'bz2', 'gz', 'lz', or 'xz'. + + +File: zutils.info, Node: Zcat, Next: Zcmp, Prev: The zutilsrc file, Up: Top + +4 Zcat +****** + +zcat copies each FILE argument to standard output in sequence. If any file +given is compressed, its decompressed content is copied. If a file given +does not exist, and its name does not end with one of the known extensions, +zcat tries the compressed file names corresponding to the formats +supported. If a file fails to decompress, zcat continues copying the rest +of the files. + + If a file is specified as '-', data are read from standard input, +decompressed if needed, and sent to standard output. Data read from +standard input must be of the same type; all uncompressed or all in the +same compressed format. + + If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + + The format for running zcat is: + + zcat [OPTIONS] [FILES] + +Exit status is 0 if no errors occurred, 1 otherwise. + + zcat supports the following options: + +'-A' +'--show-all' + Equivalent to '-vET'. + +'-b' +'--number-nonblank' + Number all nonblank output lines, starting with 1. The line count is + unlimited. + +'-e' + Equivalent to '-vE'. + +'-E' +'--show-ends' + Print a '$' after the end of each line. + +'-n' +'--number' + Number all output lines, starting with 1. The line count is unlimited. + +'-O FORMAT' +'--force-format=FORMAT' + Force the compressed format given. Valid values for FORMAT are 'bz2', + 'gz', 'lz', and 'xz'. If this option is used, the files are passed to + the corresponding decompressor without verifying their format, and the + exact file name must be given. Other names won't be tried. + +'-q' +'--quiet' + Quiet operation. Suppress all messages. + +'-r' +'--recursive' + For each directory operand, read and process all files in that + directory, recursively. Follow symbolic links given in the command + line, but skip symbolic links that are encountered recursively. + +'-R' +'--dereference-recursive' + For each directory operand, read and process all files in that + directory, recursively, following all symbolic links. + +'-s' +'--squeeze-blank' + Replace multiple adjacent blank lines with a single blank line. + +'-t' + Equivalent to '-vT'. + +'-T' +'--show-tabs' + Print TAB characters as '^I'. + +'-v' +'--show-nonprinting' + Print control characters except for LF (newline) and TAB using '^' + notation and precede characters larger than 127 with 'M-' (which + stands for "meta"). + +'--verbose' + Verbose mode. Show error messages. + + + +File: zutils.info, Node: Zcmp, Next: Zdiff, Prev: Zcat, Up: Top + +5 Zcmp +****** + +zcmp compares two files and, if they differ, writes to standard output the +first byte and line number where they differ. Bytes and lines are numbered +starting with 1. A hyphen '-' used as a FILE argument means standard input. +If any file given is compressed, its decompressed content is used. +Compressed files are decompressed on the fly; no temporary files are +created. + + The format for running zcmp is: + + zcmp [OPTIONS] FILE1 [FILE2] + +This compares FILE1 to FILE2. The standard input is used only if FILE1 or +FILE2 refers to standard input. If FILE2 is omitted zcmp tries the +following: + + - If FILE1 is compressed, compares its decompressed contents with the + corresponding uncompressed file (the name of FILE1 with the extension + removed). + + - If FILE1 is uncompressed, compares it with the decompressed contents + of FILE1.[lz|bz2|gz|xz] (the first one that is found). + +An exit status of 0 means no differences were found, 1 means some +differences were found, and 2 means trouble. + + zcmp supports the following options: + +'-b' +'--print-bytes' + Print the differing bytes. Print control bytes as a '^' followed by a + letter, and precede bytes larger than 127 with 'M-' (which stands for + "meta"). + +'-i SIZE' +'--ignore-initial=SIZE' + Ignore any differences in the first SIZE bytes of the input files. + Treat files with fewer than SIZE bytes as if they were empty. If SIZE + is in the form 'SIZE1:SIZE2', ignore the first SIZE1 bytes of the + first input file and the first SIZE2 bytes of the second input file. + +'-l' +'-v' +'--list' +'--verbose' + Print the byte numbers (in decimal) and values (in octal) of all + differing bytes. + +'-n COUNT' +'--bytes=COUNT' + Compare at most COUNT input bytes. + +'-O [FORMAT1][,FORMAT2]' +'--force-format=[FORMAT1][,FORMAT2]' + Force the compressed formats given. Any of FORMAT1 or FORMAT2 may be + omitted and the corresponding format will be automatically detected. + Valid values for FORMAT are 'bz2', 'gz', 'lz', and 'xz'. If at least + one format is specified with this option, the file is passed to the + corresponding decompressor without verifying its format, and the exact + file names of both FILE1 and FILE2 must be given. Other names won't be + tried. + +'-q' +'-s' +'--quiet' +'--silent' + Don't print anything; only return an exit status indicating whether the + files differ. + + + +File: zutils.info, Node: Zdiff, Next: Zgrep, Prev: Zcmp, Up: Top + +6 Zdiff +******* + +zdiff compares two files and, if they differ, writes to standard output the +differences line by line. A hyphen '-' used as a FILE argument means +standard input. If any file given is compressed, its decompressed content +is used. zdiff is a front end to the program diff and has the limitation +that messages from diff refer to temporary file names instead of those +specified. + + The format for running zdiff is: + + zdiff [OPTIONS] FILE1 [FILE2] + +This compares FILE1 to FILE2. The standard input is used only if FILE1 or +FILE2 refers to standard input. If FILE2 is omitted zdiff tries the +following: + + - If FILE1 is compressed, compares its decompressed contents with the + corresponding uncompressed file (the name of FILE1 with the extension + removed). + + - If FILE1 is uncompressed, compares it with the decompressed contents + of FILE1.[lz|bz2|gz|xz] (the first one that is found). + +An exit status of 0 means no differences were found, 1 means some +differences were found, and 2 means trouble. + + zdiff supports the following options (some options only work if the diff +program used supports them): + +'-a' +'--text' + Treat all files as text. + +'-b' +'--ignore-space-change' + Ignore changes in the amount of white space. + +'-B' +'--ignore-blank-lines' + Ignore changes whose lines are all blank. + +'-c' + Use the context output format. + +'-C N' +'--context=N' + Same as -c but use N lines of context. + +'-d' +'--minimal' + Try hard to find a smaller set of changes. + +'-E' +'--ignore-tab-expansion' + Ignore changes due to tab expansion. + +'-i' +'--ignore-case' + Ignore case differences in file contents. + +'-O [FORMAT1][,FORMAT2]' +'--force-format=[FORMAT1][,FORMAT2]' + Force the compressed formats given. Any of FORMAT1 or FORMAT2 may be + omitted and the corresponding format will be automatically detected. + Valid values for FORMAT are 'bz2', 'gz', 'lz', and 'xz'. If at least + one format is specified with this option, the file is passed to the + corresponding decompressor without verifying its format, and the exact + file names of both FILE1 and FILE2 must be given. Other names won't be + tried. + +'-p' +'--show-c-function' + Show which C function each change is in. + +'-q' +'--brief' + Output only whether files differ. + +'-s' +'--report-identical-files' + Report when two files are identical. + +'-t' +'--expand-tabs' + Expand tabs to spaces in output. + +'-T' +'--initial-tab' + Make tabs line up by prepending a tab. + +'-u' + Use the unified output format. + +'-U N' +'--unified=N' + Same as -u but use N lines of context. + +'-w' +'--ignore-all-space' + Ignore all white space. + + + +File: zutils.info, Node: Zgrep, Next: Ztest, Prev: Zdiff, Up: Top + +7 Zgrep +******* + +zgrep is a front end to the program grep that allows transparent search on +any combination of compressed and uncompressed files. If any file given is +compressed, its decompressed content is used. If a file given does not +exist, and its name does not end with one of the known extensions, zgrep +tries the compressed file names corresponding to the formats supported. If +a file fails to decompress, zgrep continues searching the rest of the files. + + If a file is specified as '-', data are read from standard input, +decompressed if needed, and fed to grep. Data read from standard input must +be of the same type; all uncompressed or all in the same compressed format. + + If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + + The format for running zgrep is: + + zgrep [OPTIONS] PATTERN [FILES] + +An exit status of 0 means at least one match was found, 1 means no matches +were found, and 2 means trouble. + + zgrep supports the following options (some options only work if the grep +program used supports them): + +'-a' +'--text' + Treat all files as text. + +'-A N' +'--after-context=N' + Print N lines of trailing context. + +'-b' +'--byte-offset' + Print the byte offset of each line. + +'-B N' +'--before-context=N' + Print N lines of leading context. + +'-c' +'--count' + Only print a count of matching lines per file. + +'-C N' +'--context=N' + Print N lines of output context. + +'--color[=WHEN]' + Show matched strings in color. WHEN is 'never', 'always', or 'auto'. + +'-e PATTERN' +'--regexp=PATTERN' + Use PATTERN as the pattern to match. + +'-E' +'--extended-regexp' + Treat PATTERN as an extended regular expression. + +'-f FILE' +'--file=FILE' + Obtain patterns from FILE, one per line. + When searching in several files at once, command substitution can be + used with '-e' to read FILE only once, for example if FILE is not a + regular file: 'zgrep -e "$(cat FILE)" file1.lz file2.gz' + +'-F' +'--fixed-strings' + Treat PATTERN as a set of newline-separated strings. + +'-h' +'--no-filename' + Suppress the prefixing of file names on output when multiple files are + searched. + +'-H' +'--with-filename' + Print the file name for each match. + +'-i' +'--ignore-case' + Ignore case distinctions. + +'-I' + Ignore binary files. + +'-l' +'--files-with-matches' + Only print names of files containing at least one match. + +'-L' +'--files-without-match' + Only print names of files not containing any matches. + Note: option -L fails (prints wrong results, returns wrong status, and + even hangs) when using GNU grep versions 3.2 to 3.4 inclusive because + of a wrong change in the exit status of grep, which was reverted in + GNU grep 3.5. + +'-m N' +'--max-count=N' + Stop after N matches. + +'-n' +'--line-number' + Prefix each matched line with its line number in the input file. + +'-o' +'--only-matching' + Show only the part of matching lines that actually matches PATTERN. + +'-O FORMAT' +'--force-format=FORMAT' + Force the compressed format given. Valid values for FORMAT are 'bz2', + 'gz', 'lz', and 'xz'. If this option is used, the files are passed to + the corresponding decompressor without verifying their format, and the + exact file name must be given. Other names won't be tried. + +'-q' +'--quiet' + Suppress all messages. Exit immediately with zero status if any match + is found, even if an error was detected. + +'-r' +'--recursive' + For each directory operand, read and process all files in that + directory, recursively. Follow symbolic links given in the command + line, but skip symbolic links that are encountered recursively. + +'-R' +'--dereference-recursive' + For each directory operand, read and process all files in that + directory, recursively, following all symbolic links. + +'-s' +'--no-messages' + Suppress error messages about nonexistent or unreadable files. + +'-v' +'--invert-match' + Select non-matching lines. + +'--verbose' + Verbose mode. Show error messages. + +'-w' +'--word-regexp' + Match only whole words. + +'-x' +'--line-regexp' + Match only whole lines. + + + +File: zutils.info, Node: Ztest, Next: Zupdate, Prev: Zgrep, Up: Top + +8 Ztest +******* + +ztest verifies the integrity of the compressed files specified. +Uncompressed files are ignored. If a file is specified as '-', the +integrity of compressed data read from standard input is verified. Data +read from standard input must be all in the same compressed format. If a +file fails to decompress, does not exist, can't be opened, or is a +terminal, ztest continues verifying the rest of the files. A final +diagnostic is shown at verbosity level 1 or higher if any file fails the +test when testing multiple files. + + If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + + Note that error detection in the xz format is broken. First, some xz +files lack integrity information. Second, not all xz decompressors can +verify the integrity of all xz files. Third, section 2.1.1.2 'Stream Flags' +of the xz format specification allows xz decompressors to produce garbage +output without issuing any warning. Therefore, xz files can't always be +verified as reliably as files in the other formats can. + + The format for running ztest is: + + ztest [OPTIONS] [FILES] + +The exit status is 0 if all compressed files verify OK, 1 if environmental +problems (file not found, invalid flags, I/O errors, etc), 2 if any +compressed file is corrupt or invalid. + + ztest supports the following options: + +'-O FORMAT' +'--force-format=FORMAT' + Force the compressed format given. Valid values for FORMAT are 'bz2', + 'gz', 'lz', and 'xz'. If this option is used, the files are passed to + the corresponding decompressor without verifying their format, and any + files in a format that the decompressor can't understand will fail. + For example, '--force-format=gz' can test gzipped (.gz) and compress'd + (.Z) files if the compressor used is GNU gzip. + +'-q' +'--quiet' + Quiet operation. Suppress all messages. + +'-r' +'--recursive' + For each directory operand, read and process all files in that + directory, recursively. Follow symbolic links given in the command + line, but skip symbolic links that are encountered recursively. + +'-R' +'--dereference-recursive' + For each directory operand, read and process all files in that + directory, recursively, following all symbolic links. + +'-v' +'--verbose' + Verbose mode. Show the verify status for each file processed. + Further -v's increase the verbosity level. + + + +File: zutils.info, Node: Zupdate, Next: Problems, Prev: Ztest, Up: Top + +9 Zupdate +********* + +zupdate recompresses files from bzip2, gzip, and xz formats to lzip format. +Each original is compared with the new file and then deleted. Only regular +files with standard file name extensions are recompressed, other files are +ignored. Compressed files are decompressed and then recompressed on the fly; +no temporary files are created. If an error happens while recompressing a +file, zupdate exits immediately without recompressing the rest of the files. +The lzip format is chosen as destination because it is the most appropriate +for long-term data archiving. + + If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches do nothing. + + If the lzip compressed version of a file already exists, the file is +skipped unless the option '--force' is given. In this case, if the +comparison with the existing lzip version fails, an error is returned and +the original file is not deleted. The operation of zupdate is meant to be +safe and not cause any data loss. Therefore, existing lzip compressed files +are never overwritten nor deleted. + + Combining the options '--force' and '--keep', as in +'zupdate -f -k *.gz', verifies that there are no differences between each +pair of files in a multiformat set of files. + + The names of the original files must have one of the following +extensions: +'.bz2', '.gz', or '.xz', which are recompressed to '.lz'; +'.tbz', '.tbz2', '.tgz', or '.txz', which are recompressed to '.tlz'. +Keeping the combined extensions ('.tgz' -> '.tlz') may be useful when +recompressing Slackware packages, for example. + + Recompressing a file is much like copying or moving it; therefore zupdate +preserves the access and modification dates, permissions, and, when +possible, ownership of the file just as 'cp -p' does. (If the user ID or +the group ID can't be duplicated, the file permission bits S_ISUID and +S_ISGID are cleared). + + The format for running zupdate is: + + zupdate [OPTIONS] [FILES] + +Exit status is 0 if all the compressed files were successfully recompressed +(if needed), compared, and deleted (if requested). Non-zero otherwise. + + zupdate supports the following options: + +'-f' +'--force' + Don't skip a file for which a lzip compressed version already exists. + '--force' compares the content of the input file with the content of + the existing lzip file and deletes the input file if both contents are + identical. + +'-k' +'--keep' + Keep (don't delete) the input file after comparing it with the lzip + file. + +'-l' +'--lzip-verbose' + Pass one option '-v' to the lzip compressor so that it shows the + compression ratio for each file processed. Using lzip 1.15 or newer, a + second '-l' shows the progress of compression. Use it together with + '-v' to see the name of the file. + +'-q' +'--quiet' + Quiet operation. Suppress all messages. + +'-r' +'--recursive' + For each directory operand, read and process all files in that + directory, recursively. Follow symbolic links given in the command + line, but skip symbolic links that are encountered recursively. + +'-R' +'--dereference-recursive' + For each directory operand, read and process all files in that + directory, recursively, following all symbolic links. + +'-v' +'--verbose' + Verbose mode. Show the files being processed. A second '-v' also shows + the files being ignored. + +'-0 .. -9' + Set the compression level of lzip. By default zupdate passes '-9' to + lzip. Custom compression options can be passed to lzip with the option + '--lz'. For example '--lz='lzip -9 -s64MiB''. + + + +File: zutils.info, Node: Problems, Next: Concept index, Prev: Zupdate, Up: Top + +10 Reporting bugs +***************** + +There are probably bugs in zutils. There are certainly errors and omissions +in this manual. If you report them, they will get fixed. If you don't, no +one will ever know about them and they will remain unfixed for all +eternity, if not longer. + + If you find a bug in zutils, please send electronic mail to +. Include the version number, which you can find by +running 'zupdate --version'. + + +File: zutils.info, Node: Concept index, Prev: Problems, Up: Top + +Concept index +************* + +[index] +* Menu: + +* bugs: Problems. (line 6) +* common options: Common options. (line 6) +* getting help: Problems. (line 6) +* introduction: Introduction. (line 6) +* zcat: Zcat. (line 6) +* zcmp: Zcmp. (line 6) +* zdiff: Zdiff. (line 6) +* zgrep: Zgrep. (line 6) +* ztest: Ztest. (line 6) +* zupdate: Zupdate. (line 6) +* zutilsrc: The zutilsrc file. (line 6) + + + +Tag Table: +Node: Top222 +Node: Introduction1151 +Node: Common options3776 +Ref: compressor-requirements5847 +Node: The zutilsrc file6219 +Node: Zcat7180 +Node: Zcmp9743 +Node: Zdiff12233 +Node: Zgrep14973 +Node: Ztest19218 +Node: Zupdate21725 +Node: Problems25409 +Node: Concept index25943 + +End Tag Table + + +Local Variables: +coding: iso-8859-15 +End: diff --git a/doc/zutils.texi b/doc/zutils.texi new file mode 100644 index 0000000..c494185 --- /dev/null +++ b/doc/zutils.texi @@ -0,0 +1,882 @@ +\input texinfo @c -*-texinfo-*- +@c %**start of header +@setfilename zutils.info +@documentencoding ISO-8859-15 +@settitle Zutils Manual +@finalout +@c %**end of header + +@set UPDATED 5 January 2021 +@set VERSION 1.10 + +@dircategory Data Compression +@direntry +* Zutils: (zutils). Utilities dealing with compressed files +@end direntry + + +@ifnothtml +@titlepage +@title Zutils +@subtitle Utilities dealing with compressed files +@subtitle for Zutils version @value{VERSION}, @value{UPDATED} +@author by Antonio Diaz Diaz + +@page +@vskip 0pt plus 1filll +@end titlepage + +@contents +@end ifnothtml + +@ifnottex +@node Top +@top + +This manual is for Zutils (version @value{VERSION}, @value{UPDATED}). + +@menu +* Introduction:: Purpose and features of zutils +* Common options:: Options common to all utilities +* The zutilsrc file:: The zutils configuration file +* Zcat:: Concatenating compressed files +* Zcmp:: Comparing compressed files byte by byte +* Zdiff:: Comparing compressed files line by line +* Zgrep:: Searching inside compressed files +* Ztest:: Testing the integrity of compressed files +* Zupdate:: Recompressing files to lzip format +* Problems:: Reporting bugs +* Concept index:: Index of concepts +@end menu + +@sp 1 +Copyright @copyright{} 2009-2021 Antonio Diaz Diaz. + +This manual is free documentation: you have unlimited permission to copy, +distribute, and modify it. +@end ifnottex + + +@node Introduction +@chapter Introduction +@cindex introduction + +@uref{http://www.nongnu.org/zutils/zutils.html,,Zutils} +is a collection of utilities able to process any combination of +compressed and uncompressed files transparently. If any file given, +including standard input, is compressed, its decompressed content is used. +Compressed files are decompressed on the fly; no temporary files are +created. + +These utilities are not wrapper scripts but safer and more efficient C++ +programs. In particular the option @samp{--recursive} is very efficient in +those utilities supporting it. + +@noindent +The utilities provided are zcat, zcmp, zdiff, zgrep, ztest, and zupdate.@* +The formats supported are bzip2, gzip, lzip, and xz.@* +Zutils uses external compressors. The compressor to be used for each format +is configurable at runtime. + +zcat, zcmp, zdiff, and zgrep are improved replacements for the shell scripts +provided by GNU gzip. ztest is unique to zutils. zupdate is similar to +gzip's znew. + +NOTE: Bzip2 and lzip provide well-defined values of exit status, which makes +them safe to use with zutils. Gzip and xz may return ambiguous warning +values, making them less reliable back ends for zutils. +@xref{compressor-requirements}. + +FORMAT NOTE 1: The option @samp{--format} allows the processing of a subset +of formats in recursive mode and when trying compressed file names: +@w{@samp{zgrep foo -r --format=bz2,lz somedir somefile.tar}}. + +FORMAT NOTE 2: If the option @samp{--force-format} is given, the files are +passed to the corresponding decompressor without verifying their format, +allowing for example the processing of compress'd (.Z) files with gzip: +@w{@samp{zcmp --force-format=gz file.Z file.lz}}. + +LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never have +been compressed. Decompressed is used to refer to data which have undergone +the process of decompression. + +@sp 1 +Numbers given as arguments to options (positions, sizes) may be followed +by a multiplier and an optional @samp{B} for "byte". + +Table of SI and binary prefixes (unit multipliers): + +@multitable {Prefix} {kilobyte (10^3 = 1000)} {|} {Prefix} {kibibyte (2^10 = 1024)} +@item Prefix @tab Value @tab | @tab Prefix @tab Value +@item k @tab kilobyte (10^3 = 1000) @tab | @tab Ki @tab kibibyte (2^10 = 1024) +@item M @tab megabyte (10^6) @tab | @tab Mi @tab mebibyte (2^20) +@item G @tab gigabyte (10^9) @tab | @tab Gi @tab gibibyte (2^30) +@item T @tab terabyte (10^12) @tab | @tab Ti @tab tebibyte (2^40) +@item P @tab petabyte (10^15) @tab | @tab Pi @tab pebibyte (2^50) +@item E @tab exabyte (10^18) @tab | @tab Ei @tab exbibyte (2^60) +@item Z @tab zettabyte (10^21) @tab | @tab Zi @tab zebibyte (2^70) +@item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80) +@end multitable + + +@node Common options +@chapter Common options +@cindex common options + +The following +@uref{http://www.nongnu.org/arg-parser/manual/arg_parser_manual.html#Argument-syntax,,options}: +are available in all the utilities. Rather than writing identical +descriptions for each of the programs, they are described here. +@ifnothtml +@xref{Argument syntax,,,arg_parser}. +@end ifnothtml + +@table @code +@item -h +@itemx --help +Print an informative help message describing the options and exit. zgrep +only supports the @samp{--help} form of this option. + +@item -V +@itemx --version +Print the version number on the standard output and exit. +This version number should be included in all bug reports. + +@item -M @var{format_list} +@itemx --format=@var{format_list} +Process only the formats listed in the comma-separated +@var{format_list}. Valid formats are @samp{bz2}, @samp{gz}, @samp{lz}, +@samp{xz}, and @samp{un} for @samp{uncompressed}, meaning "any file name +without a known extension". This option excludes files based on +extension, instead of format, because it is more efficient. The +exclusion only applies to names generated automatically (for example +when adding extensions to a file name or when operating recursively on +directories). Files given in the command line are always processed. + +Each format in @var{format_list} enables file names with the following +extensions: + +@multitable {bz2} {enables} {any other file name} +@item bz2 @tab enables @tab .bz2 .tbz .tbz2 +@item gz @tab enables @tab .gz .tgz +@item lz @tab enables @tab .lz .tlz +@item xz @tab enables @tab .xz .txz +@item un @tab enables @tab any other file name +@end multitable + +@item -N +@itemx --no-rcfile +Don't read the runtime configuration file @samp{zutilsrc}. + +@item --bz2=@var{command} +@itemx --gz=@var{command} +@itemx --lz=@var{command} +@itemx --xz=@var{command} +Set program to be used as (de)compressor for the corresponding format. +@var{command} may include arguments. For example +@w{@samp{--lz='plzip --threads=2'}}. The program set with @samp{--lz} is +used for both compression and decompression. The other three are used only +for decompression. The name of the program can't begin with @samp{-}. These +options override the values set in @file{zutilsrc}. The compression program +used must meet three requirements: + +@anchor{compressor-requirements} +@enumerate +@item +When called with the option @samp{-d}, it must read compressed data from +the standard input and produce decompressed data on the standard output. +@item +If the option @samp{-q} is passed to zutils, the compression program must +also accept it. +@item +It must return 0 if no errors occurred, and a non-zero value otherwise. +@end enumerate + +@end table + + +@node The zutilsrc file +@chapter The zutils configuration file 'zutilsrc' +@cindex zutilsrc + +@file{zutilsrc} is the runtime configuration file for zutils. In it you +may define the compressor name and options to be used for each format. +@file{zutilsrc} is optional; you don't need to install it in order to run +zutils. + +The compressors specified in the command line override those specified +in @file{zutilsrc}. + +You may copy the system @file{zutilsrc} file @file{$@{sysconfdir@}/zutilsrc} +to @file{$HOME/.zutilsrc} and customize these options as you like. The file +syntax is fairly obvious (and there are further instructions in it): + +@enumerate +@item +Any line beginning with @samp{#} is a comment line. +@item +Each non-comment line defines the command to be used for the corresponding +format, with the syntax: +@example + = [options] +@end example +where is one of @samp{bz2}, @samp{gz}, @samp{lz}, or @samp{xz}. +@end enumerate + + +@node Zcat +@chapter Zcat +@cindex zcat + +zcat copies each @var{file} argument to standard output in sequence. If any +file given is compressed, its decompressed content is copied. If a file +given does not exist, and its name does not end with one of the known +extensions, zcat tries the compressed file names corresponding to the +formats supported. If a file fails to decompress, zcat continues copying the +rest of the files. + +If a file is specified as @samp{-}, data are read from standard input, +decompressed if needed, and sent to standard output. Data read from +standard input must be of the same type; all uncompressed or all in the +same compressed format. + +If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + +The format for running zcat is: + +@example +zcat [@var{options}] [@var{files}] +@end example + +@noindent +Exit status is 0 if no errors occurred, 1 otherwise. + +zcat supports the following options: + +@table @code +@item -A +@itemx --show-all +Equivalent to @samp{-vET}. + +@item -b +@itemx --number-nonblank +Number all nonblank output lines, starting with 1. The line count is +unlimited. + +@item -e +Equivalent to @samp{-vE}. + +@item -E +@itemx --show-ends +Print a @samp{$} after the end of each line. + +@item -n +@itemx --number +Number all output lines, starting with 1. The line count is unlimited. + +@item -O @var{format} +@itemx --force-format=@var{format} +Force the compressed format given. Valid values for @var{format} are +@samp{bz2}, @samp{gz}, @samp{lz}, and @samp{xz}. If this option is used, +the files are passed to the corresponding decompressor without verifying +their format, and the exact file name must be given. Other names won't +be tried. + +@item -q +@itemx --quiet +Quiet operation. Suppress all messages. + +@item -r +@itemx --recursive +For each directory operand, read and process all files in that directory, +recursively. Follow symbolic links given in the command line, but skip +symbolic links that are encountered recursively. + +@item -R +@itemx --dereference-recursive +For each directory operand, read and process all files in that directory, +recursively, following all symbolic links. + +@item -s +@itemx --squeeze-blank +Replace multiple adjacent blank lines with a single blank line. + +@item -t +Equivalent to @samp{-vT}. + +@item -T +@itemx --show-tabs +Print TAB characters as @samp{^I}. + +@item -v +@itemx --show-nonprinting +Print control characters except for LF (newline) and TAB using @samp{^} +notation and precede characters larger than 127 with @samp{M-} (which +stands for "meta"). + +@item --verbose +Verbose mode. Show error messages. + +@end table + + +@node Zcmp +@chapter Zcmp +@cindex zcmp + +zcmp compares two files and, if they differ, writes to standard output the +first byte and line number where they differ. Bytes and lines are numbered +starting with 1. A hyphen @samp{-} used as a @var{file} argument means +standard input. If any file given is compressed, its decompressed content is +used. Compressed files are decompressed on the fly; no temporary files are +created. + +The format for running zcmp is: + +@example +zcmp [@var{options}] @var{file1} [@var{file2}] +@end example + +@noindent +This compares @var{file1} to @var{file2}. The standard input is used only if +@var{file1} or @var{file2} refers to standard input. If @var{file2} is +omitted zcmp tries the following: + +@itemize - +@item +If @var{file1} is compressed, compares its decompressed contents with +the corresponding uncompressed file (the name of @var{file1} with the +extension removed). +@item +If @var{file1} is uncompressed, compares it with the decompressed +contents of @var{file1}.[lz|bz2|gz|xz] (the first one that is found). +@end itemize + +@noindent +An exit status of 0 means no differences were found, 1 means some +differences were found, and 2 means trouble. + +zcmp supports the following options: + +@table @code +@item -b +@itemx --print-bytes +Print the differing bytes. Print control bytes as a @samp{^} followed by +a letter, and precede bytes larger than 127 with @samp{M-} (which stands +for "meta"). + +@item -i @var{size} +@itemx --ignore-initial=@var{size} +Ignore any differences in the first @var{size} bytes of the input files. +Treat files with fewer than @var{size} bytes as if they were empty. If +@var{size} is in the form @samp{@var{size1}:@var{size2}}, ignore the +first @var{size1} bytes of the first input file and the first +@var{size2} bytes of the second input file. + +@item -l +@itemx -v +@itemx --list +@itemx --verbose +Print the byte numbers (in decimal) and values (in octal) of all +differing bytes. + +@item -n @var{count} +@itemx --bytes=@var{count} +Compare at most @var{count} input bytes. + +@item -O [@var{format1}][,@var{format2}] +@itemx --force-format=[@var{format1}][,@var{format2}] +Force the compressed formats given. Any of @var{format1} or +@var{format2} may be omitted and the corresponding format will be +automatically detected. Valid values for @var{format} are @samp{bz2}, +@samp{gz}, @samp{lz}, and @samp{xz}. If at least one format is specified +with this option, the file is passed to the corresponding decompressor +without verifying its format, and the exact file names of both +@var{file1} and @var{file2} must be given. Other names won't be tried. + +@item -q +@itemx -s +@itemx --quiet +@itemx --silent +Don't print anything; only return an exit status indicating whether the +files differ. + +@end table + + +@node Zdiff +@chapter Zdiff +@cindex zdiff + +zdiff compares two files and, if they differ, writes to standard output the +differences line by line. A hyphen @samp{-} used as a @var{file} argument +means standard input. If any file given is compressed, its decompressed +content is used. zdiff is a front end to the program diff and has the +limitation that messages from diff refer to temporary file names instead of +those specified. + +The format for running zdiff is: + +@example +zdiff [@var{options}] @var{file1} [@var{file2}] +@end example + +@noindent +This compares @var{file1} to @var{file2}. The standard input is used only if +@var{file1} or @var{file2} refers to standard input. If @var{file2} is +omitted zdiff tries the following: + +@itemize - +@item +If @var{file1} is compressed, compares its decompressed contents with +the corresponding uncompressed file (the name of @var{file1} with the +extension removed). +@item +If @var{file1} is uncompressed, compares it with the decompressed +contents of @var{file1}.[lz|bz2|gz|xz] (the first one that is found). +@end itemize + +@noindent +An exit status of 0 means no differences were found, 1 means some +differences were found, and 2 means trouble. + +zdiff supports the following options (some options only work if the diff +program used supports them): + +@table @code +@item -a +@itemx --text +Treat all files as text. + +@item -b +@itemx --ignore-space-change +Ignore changes in the amount of white space. + +@item -B +@itemx --ignore-blank-lines +Ignore changes whose lines are all blank. + +@itemx -c +Use the context output format. + +@item -C @var{n} +@itemx --context=@var{n} +Same as -c but use @var{n} lines of context. + +@item -d +@itemx --minimal +Try hard to find a smaller set of changes. + +@item -E +@itemx --ignore-tab-expansion +Ignore changes due to tab expansion. + +@item -i +@itemx --ignore-case +Ignore case differences in file contents. + +@item -O [@var{format1}][,@var{format2}] +@itemx --force-format=[@var{format1}][,@var{format2}] +Force the compressed formats given. Any of @var{format1} or +@var{format2} may be omitted and the corresponding format will be +automatically detected. Valid values for @var{format} are @samp{bz2}, +@samp{gz}, @samp{lz}, and @samp{xz}. If at least one format is specified +with this option, the file is passed to the corresponding decompressor +without verifying its format, and the exact file names of both +@var{file1} and @var{file2} must be given. Other names won't be tried. + +@item -p +@itemx --show-c-function +Show which C function each change is in. + +@item -q +@itemx --brief +Output only whether files differ. + +@item -s +@itemx --report-identical-files +Report when two files are identical. + +@item -t +@itemx --expand-tabs +Expand tabs to spaces in output. + +@item -T +@itemx --initial-tab +Make tabs line up by prepending a tab. + +@item -u +Use the unified output format. + +@item -U @var{n} +@itemx --unified=@var{n} +Same as -u but use @var{n} lines of context. + +@item -w +@itemx --ignore-all-space +Ignore all white space. + +@end table + + +@node Zgrep +@chapter Zgrep +@cindex zgrep + +zgrep is a front end to the program grep that allows transparent search +on any combination of compressed and uncompressed files. If any file +given is compressed, its decompressed content is used. If a file given +does not exist, and its name does not end with one of the known +extensions, zgrep tries the compressed file names corresponding to the +formats supported. If a file fails to decompress, zgrep continues +searching the rest of the files. + +If a file is specified as @samp{-}, data are read from standard input, +decompressed if needed, and fed to grep. Data read from standard input +must be of the same type; all uncompressed or all in the same +compressed format. + +If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + +The format for running zgrep is: + +@example +zgrep [@var{options}] @var{pattern} [@var{files}] +@end example + +@noindent +An exit status of 0 means at least one match was found, 1 means no +matches were found, and 2 means trouble. + +zgrep supports the following options (some options only work if the grep +program used supports them): + +@table @code +@item -a +@itemx --text +Treat all files as text. + +@item -A @var{n} +@itemx --after-context=@var{n} +Print @var{n} lines of trailing context. + +@item -b +@itemx --byte-offset +Print the byte offset of each line. + +@item -B @var{n} +@itemx --before-context=@var{n} +Print @var{n} lines of leading context. + +@item -c +@itemx --count +Only print a count of matching lines per file. + +@item -C @var{n} +@itemx --context=@var{n} +Print @var{n} lines of output context. + +@item --color[=@var{when}] +Show matched strings in color. @var{when} is @samp{never}, @samp{always}, +or @samp{auto}. + +@item -e @var{pattern} +@itemx --regexp=@var{pattern} +Use @var{pattern} as the pattern to match. + +@item -E +@itemx --extended-regexp +Treat @var{pattern} as an extended regular expression. + +@item -f @var{file} +@itemx --file=@var{file} +Obtain patterns from @var{file}, one per line.@* +When searching in several files at once, command substitution can be +used with @samp{-e} to read @var{file} only once, for example if +@var{file} is not a regular file: +@w{@samp{zgrep -e "$(cat @var{file})" file1.lz file2.gz}} + +@item -F +@itemx --fixed-strings +Treat @var{pattern} as a set of newline-separated strings. + +@item -h +@itemx --no-filename +Suppress the prefixing of file names on output when multiple files are +searched. + +@item -H +@itemx --with-filename +Print the file name for each match. + +@item -i +@itemx --ignore-case +Ignore case distinctions. + +@item -I +Ignore binary files. + +@item -l +@itemx --files-with-matches +Only print names of files containing at least one match. + +@item -L +@itemx --files-without-match +Only print names of files not containing any matches.@* +Note: option -L fails (prints wrong results, returns wrong status, and even +hangs) when using GNU grep versions 3.2 to 3.4 inclusive because of a wrong +change in the exit status of grep, which was reverted in GNU grep 3.5. + +@item -m @var{n} +@itemx --max-count=@var{n} +Stop after @var{n} matches. + +@item -n +@itemx --line-number +Prefix each matched line with its line number in the input file. + +@item -o +@itemx --only-matching +Show only the part of matching lines that actually matches @var{pattern}. + +@item -O @var{format} +@itemx --force-format=@var{format} +Force the compressed format given. Valid values for @var{format} are +@samp{bz2}, @samp{gz}, @samp{lz}, and @samp{xz}. If this option is used, +the files are passed to the corresponding decompressor without verifying +their format, and the exact file name must be given. Other names won't +be tried. + +@item -q +@itemx --quiet +Suppress all messages. Exit immediately with zero status if any match is +found, even if an error was detected. + +@item -r +@itemx --recursive +For each directory operand, read and process all files in that directory, +recursively. Follow symbolic links given in the command line, but skip +symbolic links that are encountered recursively. + +@item -R +@itemx --dereference-recursive +For each directory operand, read and process all files in that directory, +recursively, following all symbolic links. + +@item -s +@itemx --no-messages +Suppress error messages about nonexistent or unreadable files. + +@item -v +@itemx --invert-match +Select non-matching lines. + +@item --verbose +Verbose mode. Show error messages. + +@item -w +@itemx --word-regexp +Match only whole words. + +@item -x +@itemx --line-regexp +Match only whole lines. + +@end table + + +@node Ztest +@chapter Ztest +@cindex ztest + +ztest verifies the integrity of the compressed files specified. +Uncompressed files are ignored. If a file is specified as @samp{-}, the +integrity of compressed data read from standard input is verified. Data +read from standard input must be all in the same compressed format. If +a file fails to decompress, does not exist, can't be opened, or is a +terminal, ztest continues verifying the rest of the files. A final +diagnostic is shown at verbosity level 1 or higher if any file fails the +test when testing multiple files. + +If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + +Note that error detection in the xz format is broken. First, some xz +files lack integrity information. Second, not all xz decompressors can +@uref{http://www.nongnu.org/lzip/xz_inadequate.html#fragmented,,verify the integrity} +of all xz files. Third, section 2.1.1.2 'Stream Flags' of the +@uref{http://tukaani.org/xz/xz-file-format.txt,,xz format specification} +allows xz decompressors to produce garbage output without issuing any +warning. Therefore, xz files can't always be verified as reliably as +files in the other formats can. +@c We can only hope that xz is soon abandoned. + +The format for running ztest is: + +@example +ztest [@var{options}] [@var{files}] +@end example + +@noindent +The exit status is 0 if all compressed files verify OK, 1 if +environmental problems (file not found, invalid flags, I/O errors, etc), +2 if any compressed file is corrupt or invalid. + +ztest supports the following options: + +@table @code +@item -O @var{format} +@itemx --force-format=@var{format} +Force the compressed format given. Valid values for @var{format} are +@samp{bz2}, @samp{gz}, @samp{lz}, and @samp{xz}. If this option is used, the +files are passed to the corresponding decompressor without verifying their +format, and any files in a format that the decompressor can't understand +will fail. For example, @samp{--force-format=gz} can test gzipped (.gz) and +compress'd (.Z) files if the compressor used is GNU gzip. + +@item -q +@itemx --quiet +Quiet operation. Suppress all messages. + +@item -r +@itemx --recursive +For each directory operand, read and process all files in that directory, +recursively. Follow symbolic links given in the command line, but skip +symbolic links that are encountered recursively. + +@item -R +@itemx --dereference-recursive +For each directory operand, read and process all files in that directory, +recursively, following all symbolic links. + +@item -v +@itemx --verbose +Verbose mode. Show the verify status for each file processed.@* +Further -v's increase the verbosity level. + +@end table + + +@node Zupdate +@chapter Zupdate +@cindex zupdate + +zupdate recompresses files from bzip2, gzip, and xz formats to lzip format. +Each original is compared with the new file and then deleted. Only regular +files with standard file name extensions are recompressed, other files are +ignored. Compressed files are decompressed and then recompressed on the fly; +no temporary files are created. If an error happens while recompressing a +file, zupdate exits immediately without recompressing the rest of the files. +The lzip format is chosen as destination because it is the most appropriate +for long-term data archiving. + +If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches do nothing. + +If the lzip compressed version of a file already exists, the file is +skipped unless the option @samp{--force} is given. In this case, if the +comparison with the existing lzip version fails, an error is returned +and the original file is not deleted. The operation of zupdate is meant +to be safe and not cause any data loss. Therefore, existing lzip +compressed files are never overwritten nor deleted. + +Combining the options @samp{--force} and @samp{--keep}, as in +@w{@samp{zupdate -f -k *.gz}}, verifies that there are no differences +between each pair of files in a multiformat set of files. + +The names of the original files must have one of the following extensions:@* +@samp{.bz2}, @samp{.gz}, or @samp{.xz}, which are recompressed to +@samp{.lz};@* +@samp{.tbz}, @samp{.tbz2}, @samp{.tgz}, or @samp{.txz}, which are +recompressed to @samp{.tlz}.@* +Keeping the combined extensions (@samp{.tgz} --> @samp{.tlz}) may be useful +when recompressing Slackware packages, for example. + +Recompressing a file is much like copying or moving it; therefore zupdate +preserves the access and modification dates, permissions, and, when +possible, ownership of the file just as @samp{cp -p} does. (If the user ID or +the group ID can't be duplicated, the file permission bits S_ISUID and +S_ISGID are cleared). + +The format for running zupdate is: + +@example +zupdate [@var{options}] [@var{files}] +@end example + +@noindent +Exit status is 0 if all the compressed files were successfully recompressed +(if needed), compared, and deleted (if requested). Non-zero otherwise. + +zupdate supports the following options: + +@table @code +@item -f +@itemx --force +Don't skip a file for which a lzip compressed version already exists. +@samp{--force} compares the content of the input file with the content +of the existing lzip file and deletes the input file if both contents +are identical. + +@item -k +@itemx --keep +Keep (don't delete) the input file after comparing it with the lzip file. + +@item -l +@itemx --lzip-verbose +Pass one option @samp{-v} to the lzip compressor so that it shows the +compression ratio for each file processed. Using lzip 1.15 or newer, a +second @samp{-l} shows the progress of compression. Use it together with +@samp{-v} to see the name of the file. + +@item -q +@itemx --quiet +Quiet operation. Suppress all messages. + +@item -r +@itemx --recursive +For each directory operand, read and process all files in that directory, +recursively. Follow symbolic links given in the command line, but skip +symbolic links that are encountered recursively. + +@item -R +@itemx --dereference-recursive +For each directory operand, read and process all files in that directory, +recursively, following all symbolic links. + +@item -v +@itemx --verbose +Verbose mode. Show the files being processed. A second @samp{-v} also +shows the files being ignored. + +@item -0 .. -9 +Set the compression level of lzip. By default zupdate passes @samp{-9} to +lzip. Custom compression options can be passed to lzip with the option +@samp{--lz}. For example @w{@samp{--lz='lzip -9 -s64MiB'}}. + +@end table + + +@node Problems +@chapter Reporting bugs +@cindex bugs +@cindex getting help + +There are probably bugs in zutils. There are certainly errors and +omissions in this manual. If you report them, they will get fixed. If +you don't, no one will ever know about them and they will remain unfixed +for all eternity, if not longer. + +If you find a bug in zutils, please send electronic mail to +@email{zutils-bug@@nongnu.org}. Include the version number, which you can +find by running @w{@samp{zupdate --version}}. + + +@node Concept index +@unnumbered Concept index + +@printindex cp + +@bye diff --git a/rc.cc b/rc.cc new file mode 100644 index 0000000..0162510 --- /dev/null +++ b/rc.cc @@ -0,0 +1,411 @@ +/* Zutils - Utilities dealing with compressed files + Copyright (C) 2009-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "arg_parser.h" +#include "rc.h" + + +const char * invocation_name = 0; +const char * program_name = 0; +int verbosity = 0; + +namespace { + +const char * const config_file_name = "zutilsrc"; +const char * const program_year = "2021"; + +std::string compressor_names[num_formats] = + { "bzip2", "gzip", "lzip", "xz" }; // default compressor names + +// args to compressors read from rc or from options --[bglx]z, maybe empty +std::vector< std::string > compressor_args[num_formats]; + +// vector of enabled formats plus [num_formats] for uncompressed. +// empty means all enabled. +std::vector< bool > enabled_formats; + +const struct { const char * from; const char * to; int format_index; } + known_extensions[] = { + { ".bz2", "", fmt_bz2 }, + { ".tbz", ".tar", fmt_bz2 }, + { ".tbz2", ".tar", fmt_bz2 }, + { ".gz", "", fmt_gz }, + { ".tgz", ".tar", fmt_gz }, + { ".lz", "", fmt_lz }, + { ".tlz", ".tar", fmt_lz }, + { ".xz", "", fmt_xz }, + { ".txz", ".tar", fmt_xz }, + { 0, 0, -1 } }; + + +int my_fgetc( FILE * const f ) + { + int ch; + bool comment = false; + + do { + ch = std::fgetc( f ); + if( ch == '#' ) comment = true; + else if( ch == '\n' || ch == EOF ) comment = false; + else if( ch == '\\' && comment ) + { + const int c = std::fgetc( f ); + if( c == '\n' ) { std::ungetc( c, f ); comment = false; } + } + } + while( comment ); + return ch; + } + + +// Returns the parity of escapes (backslashes) at the end of a string. +bool trailing_escape( const std::string & s ) + { + unsigned len = s.size(); + bool odd_escape = false; + while( len > 0 && s[--len] == '\\' ) odd_escape = !odd_escape; + return odd_escape; + } + + +/* Read a line discarding comments, leading whitespace, and blank lines. + Escaped newlines are discarded. + Returns the empty string if at EOF. +*/ +const std::string & my_fgets( FILE * const f, int & linenum ) + { + static std::string s; + bool strip = true; // strip leading whitespace + s.clear(); + + while( true ) + { + int ch = my_fgetc( f ); + if( strip ) + { + strip = false; + while( std::isspace( ch ) ) + { if( ch == '\n' ) { ++linenum; } ch = my_fgetc( f ); } + } + if( ch == EOF ) { if( s.size() ) { ++linenum; } break; } + else if( ch == '\n' ) + { + ++linenum; strip = true; + if( trailing_escape( s ) ) s.erase( s.size() - 1 ); + else if( s.size() ) break; + } + else s += ch; + } + return s; + } + + +bool parse_compressor_command( const std::string & s, int i, + const int format_index ) + { + const int len = s.size(); + while( i < len && std::isspace( s[i] ) ) ++i; // strip spaces + int l = i; + while( i < len && !std::isspace( s[i] ) ) ++i; + if( l >= i || s[l] == '-' ) return false; + compressor_names[format_index].assign( s, l, i - l ); + + compressor_args[format_index].clear(); + while( i < len ) + { + while( i < len && std::isspace( s[i] ) ) ++i; // strip spaces + l = i; + while( i < len && !std::isspace( s[i] ) ) ++i; + if( l < i ) + compressor_args[format_index].push_back( std::string( s, l, i - l ) ); + } + return true; + } + + +bool parse_rc_line( const std::string & line, + const char * const filename, const int linenum ) + { + const int len = line.size(); + int i = 0; + while( i < len && std::isspace( line[i] ) ) ++i; // strip spaces + int l = i; + while( i < len && line[i] != '=' && !std::isspace( line[i] ) ) ++i; + if( l >= i ) + { if( verbosity >= 0 ) + std::fprintf( stderr, "%s %d: missing format name.\n", filename, linenum ); + return false; } + const std::string name( line, l, i - l ); + int format_index = -1; + for( int j = 0; j < num_formats; ++j ) + if( name == format_names[j] ) { format_index = j; break; } + if( format_index < 0 ) + { if( verbosity >= 0 ) + std::fprintf( stderr, "%s %d: bad format name '%s'\n", + filename, linenum, name.c_str() ); + return false; } + + while( i < len && std::isspace( line[i] ) ) ++i; // strip spaces + if( i <= 0 || i >= len || line[i] != '=' ) + { if( verbosity >= 0 ) + std::fprintf( stderr, "%s %d: missing '='\n", filename, linenum ); + return false; } + ++i; // skip the '=' + if( !parse_compressor_command( line, i, format_index ) ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s %d: missing compressor name.\n", filename, linenum ); + return false; + } + return true; + } + + + // Returns 0 for success, 1 for file not found, 2 for syntax error. +int process_rcfile( const std::string & name ) + { + FILE * const f = std::fopen( name.c_str(), "r" ); + if( !f ) return 1; + + int linenum = 0; + int retval = 0; + + while( true ) + { + const std::string & line = my_fgets( f, linenum ); + if( line.empty() ) break; // EOF + if( !parse_rc_line( line, name.c_str(), linenum ) ) + { retval = 2; break; } + } + std::fclose( f ); + return retval; + } + +} // end namespace + + +bool enabled_format( const int format_index ) + { + if( enabled_formats.size() <= num_formats ) return true; // all enabled + if( format_index < 0 ) return enabled_formats[num_formats]; // uncompressed + return enabled_formats[format_index]; + } + + +void parse_format_list( const std::string & arg ) + { + const std::string un( "uncompressed" ); + bool error = arg.empty(); + enabled_formats.assign( num_formats + 1, false ); + + for( unsigned l = 0, r; l < arg.size(); l = r + 1 ) + { + r = std::min( arg.find( ',', l ), arg.size() ); + if( l >= r ) { error = true; break; } // empty format + int format_index = num_formats; + const std::string s( arg, l, r - l ); + for( int i = 0; i < num_formats; ++i ) + if( s == format_names[i] ) + { format_index = i; break; } + if( format_index == num_formats && un.find( s ) != 0 ) + { error = true; break; } + enabled_formats[format_index] = true; + } + if( error ) + { show_error( "Bad argument for option '--format'." ); std::exit( 1 ); } + } + + +int parse_format_type( const std::string & arg ) + { + for( int i = 0; i < num_formats; ++i ) + if( arg == format_names[i] ) + return i; + show_error( "Bad argument for option '--force-format'." ); + std::exit( 1 ); + } + + +int extension_index( const std::string & name ) + { + for( int eindex = 0; known_extensions[eindex].from; ++eindex ) + { + const std::string ext( known_extensions[eindex].from ); + if( name.size() > ext.size() && + name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 ) + return eindex; + } + return -1; + } + +int extension_format( const int eindex ) + { return ( eindex >= 0 ) ? known_extensions[eindex].format_index : -1; } + +const char * extension_from( const int eindex ) + { return known_extensions[eindex].from; } + +const char * extension_to( const int eindex ) + { return known_extensions[eindex].to; } + + +void maybe_process_config_file( const Arg_parser & parser ) + { + for( int i = 0; i < parser.arguments(); ++i ) + if( parser.code( i ) == 'N' ) return; + std::string name; + const char * p = std::getenv( "HOME" ); if( p ) name = p; + if( name.size() ) + { + name += "/."; name += config_file_name; + const int retval = process_rcfile( name ); + if( retval == 0 ) return; + if( retval == 2 ) std::exit( 2 ); + } + name = SYSCONFDIR; name += '/'; name += config_file_name; + const int retval = process_rcfile( name ); + if( retval == 2 ) std::exit( 2 ); + } + + +void parse_compressor( const std::string & arg, const int format_index, + const int eretval ) + { + if( !parse_compressor_command( arg, 0, format_index ) ) + { show_error( "Missing compressor name." ); std::exit( eretval ); } + } + + +const char * get_compressor_name( const int format_index ) + { + if( format_index >= 0 && format_index < num_formats && + compressor_names[format_index].size() ) + return compressor_names[format_index].c_str(); + return 0; + } + + +const std::vector< std::string > & get_compressor_args( const int format_index ) + { + return compressor_args[format_index]; + } + + +void show_help_addr() + { + std::printf( "\nReport bugs to zutils-bug@nongnu.org\n" + "Zutils home page: http://www.nongnu.org/zutils/zutils.html\n" ); + } + + +void show_version() + { + std::printf( "%s (zutils) %s\n", program_name, PROGVERSION ); + std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); + std::printf( "License GPLv2+: GNU GPL version 2 or later \n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n" ); + } + + +void show_error( const char * const msg, const int errcode, const bool help ) + { + if( verbosity < 0 ) return; + if( msg && msg[0] ) + std::fprintf( stderr, "%s: %s%s%s\n", program_name, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + if( help ) + std::fprintf( stderr, "Try '%s --help' for more information.\n", + invocation_name ); + } + + +void show_file_error( const char * const filename, const char * const msg, + const int errcode ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + } + + +void internal_error( const char * const msg ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); + std::exit( 3 ); + } + + +void show_close_error( const char * const prog_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Error closing output of %s: %s\n", + program_name, prog_name, std::strerror( errno ) ); + } + + +void show_exec_error( const char * const prog_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't exec '%s': %s\n", + program_name, prog_name, std::strerror( errno ) ); + } + + +void show_fork_error( const char * const prog_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't fork '%s': %s\n", + program_name, prog_name, std::strerror( errno ) ); + } + + +int wait_for_child( const pid_t pid, const char * const name, + const int eretval, const bool isgzxz ) + { + int status; + while( waitpid( pid, &status, 0 ) == -1 ) + { + if( errno != EINTR ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Error waiting termination of '%s': %s\n", + program_name, name, std::strerror( errno ) ); + _exit( eretval ); + } + } + if( WIFEXITED( status ) ) + { + const int tmp = WEXITSTATUS( status ); + if( isgzxz && eretval == 1 && tmp == 1 ) return 2; // for ztest + return tmp; + } + return eretval; + } diff --git a/rc.h b/rc.h new file mode 100644 index 0000000..38a737e --- /dev/null +++ b/rc.h @@ -0,0 +1,62 @@ +/* Zutils - Utilities dealing with compressed files + Copyright (C) 2009-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +enum { fmt_bz2, fmt_gz, fmt_lz, fmt_xz, num_formats }; // format_index +const char * const format_names[num_formats] = { "bz2", "gz", "lz", "xz" }; +const char * const simple_extensions[num_formats] = + { ".bz2", ".gz", ".lz", ".xz" }; +const int format_order[num_formats] = + { fmt_lz, fmt_bz2, fmt_gz, fmt_xz }; // search order + +bool enabled_format( const int format_index ); +void parse_format_list( const std::string & arg ); +int parse_format_type( const std::string & arg ); + +int extension_index( const std::string & name ); // -1 if unknown +int extension_format( const int eindex ); // -1 if uncompressed +const char * extension_from( const int eindex ); +const char * extension_to( const int eindex ); + +extern const char * invocation_name; +extern const char * program_name; +extern int verbosity; + +class Arg_parser; + +void maybe_process_config_file( const Arg_parser & parser ); + +void parse_compressor( const std::string & arg, const int format_index, + const int eretval = 2 ); + +const char * get_compressor_name( const int format_index ); +const std::vector< std::string > & get_compressor_args( const int format_index ); + +void show_help_addr(); +void show_version(); +void show_error( const char * const msg, const int errcode = 0, + const bool help = false ); +void show_file_error( const char * const filename, const char * const msg, + const int errcode = 0 ); +void internal_error( const char * const msg ); +void show_close_error( const char * const prog_name = "data feeder" ); +void show_exec_error( const char * const prog_name ); +void show_fork_error( const char * const prog_name ); + +// Returns exit status of child process 'pid', or 'eretval' in case of error. +// +int wait_for_child( const pid_t pid, const char * const name, + const int eretval = 2, const bool isgzxz = false ); diff --git a/recursive.cc b/recursive.cc new file mode 100644 index 0000000..a69e117 --- /dev/null +++ b/recursive.cc @@ -0,0 +1,109 @@ +/* Zutils - Utilities dealing with compressed files + Copyright (C) 2009-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* Returns true if full_name is a regular file with an enabled extension + or (a link to) a directory. */ +bool test_full_name( const std::string & full_name, const struct stat * stp, + const bool follow ) + { + struct stat st, st2; + if( follow && stat( full_name.c_str(), &st ) != 0 ) return false; + if( !follow && lstat( full_name.c_str(), &st ) != 0 ) return false; + if( S_ISREG( st.st_mode ) ) // regular file + return enabled_format( extension_format( extension_index( full_name ) ) ); + if( !S_ISDIR( st.st_mode ) ) return false; + + std::string prev_dir( full_name ); + bool loop = ( stp && st.st_ino == stp->st_ino && st.st_dev == stp->st_dev ); + if( !loop ) + for( unsigned i = prev_dir.size(); i > 1; ) + { + while( i > 0 && prev_dir[i-1] != '/' ) --i; + if( i == 0 ) break; + if( i > 1 ) --i; // remove trailing slash except at root dir + prev_dir.resize( i ); + if( stat( prev_dir.c_str(), &st2 ) != 0 || !S_ISDIR( st2.st_mode ) || + ( st.st_ino == st2.st_ino && st.st_dev == st2.st_dev ) ) + { loop = true; break; } + } + if( loop ) // full_name already visited or above tree + show_file_error( full_name.c_str(), "warning: Recursive directory loop." ); + return !loop; // (link to) directory + } + + +/* Returns in input_filename the next filename, or "." for stdin. + ("." was chosen because it is not a valid filename). + Sets 'error' to true if a directory fails to open. */ +bool next_filename( std::list< std::string > & filenames, + std::string & input_filename, bool & error, + const int recursive, const bool ignore_stdin = false, + const bool no_messages = false ) + { + while( !filenames.empty() ) + { + input_filename = filenames.front(); + filenames.pop_front(); + if( input_filename == "-" ) + { + if( ignore_stdin ) continue; + input_filename = "."; return true; + } + struct stat st; + if( stat( input_filename.c_str(), &st ) == 0 && S_ISDIR( st.st_mode ) ) + { + if( recursive ) + { + DIR * const dirp = opendir( input_filename.c_str() ); + if( !dirp ) + { + if( !no_messages ) + show_file_error( input_filename.c_str(), "Can't open directory", errno ); + error = true; continue; + } + for( unsigned i = input_filename.size(); + i > 1 && input_filename[i-1] == '/'; --i ) + input_filename.resize( i - 1 ); // remove trailing slashes + struct stat stdot, *stdotp = 0; + if( input_filename[0] != '/' ) // relative path + { + if( input_filename == "." ) input_filename.clear(); + if( stat( ".", &stdot ) == 0 && S_ISDIR( stdot.st_mode ) ) + stdotp = &stdot; + } + if( input_filename.size() && input_filename != "/" ) + input_filename += '/'; + std::list< std::string > tmp_list; + while( true ) + { + const struct dirent * const entryp = readdir( dirp ); + if( !entryp ) { closedir( dirp ); break; } + const std::string tmp_name( entryp->d_name ); + if( tmp_name == "." || tmp_name == ".." ) continue; + const std::string full_name( input_filename + tmp_name ); + if( test_full_name( full_name, stdotp, recursive == 2 ) ) + tmp_list.push_back( full_name ); + } + filenames.splice( filenames.begin(), tmp_list ); + } + continue; + } + return true; + } + input_filename.clear(); + return false; + } diff --git a/testsuite/check.sh b/testsuite/check.sh new file mode 100755 index 0000000..6a6ef38 --- /dev/null +++ b/testsuite/check.sh @@ -0,0 +1,560 @@ +#! /bin/sh +# check script for Zutils - Utilities dealing with compressed files +# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# +# This script is free software: you have unlimited permission +# to copy, distribute, and modify it. + +LC_ALL=C +export LC_ALL +objdir=`pwd` +testdir=`cd "$1" ; pwd` +ZCAT="${objdir}"/zcat +ZCMP="${objdir}"/zcmp +ZDIFF="${objdir}"/zdiff +ZGREP="${objdir}"/zgrep +ZEGREP="${objdir}"/zegrep +ZFGREP="${objdir}"/zfgrep +ZTEST="${objdir}"/ztest +ZUPDATE="${objdir}"/zupdate +compressors="bzip2 gzip lzip" +extensions="bz2 gz lz" +compressor_needed() { echo "${compressors} are needed to run tests" ; exit 1 ; } +framework_failure() { echo "failure in testing framework" ; exit 1 ; } + +if [ ! -f "${ZUPDATE}" ] || [ ! -x "${ZUPDATE}" ] ; then + echo "${ZUPDATE}: cannot execute" + exit 1 +fi + +[ -e "${ZUPDATE}" ] 2> /dev/null || + { + echo "$0: a POSIX shell is required to run the tests" + echo "Try bash -c \"$0 $1 $2\"" + exit 1 + } + +if [ -d tmp ] ; then rm -rf tmp ; fi +mkdir tmp +cd "${objdir}"/tmp || framework_failure + +for i in ${compressors}; do + cat "${testdir}"/test.txt > in || framework_failure + $i in || compressor_needed + printf "Hello World!\n" > hello || framework_failure + $i hello || compressor_needed +done + +cat "${testdir}"/test.txt > in || framework_failure +cat "${testdir}"/test.txt.tar > in.tar || framework_failure +printf "01234567890" > pin.tar || framework_failure +cat in.tar in.tar in.tar in.tar >> pin.tar || framework_failure +cat in > -in- || framework_failure +cat in.lz > -in-.lz || framework_failure +cat in.lz > lz_only.lz || framework_failure +cat in in in in in in > in6 || framework_failure +bad0_lz="${testdir}"/zero_bad_crc.lz +bad0_gz="${testdir}"/zero_bad_crc.gz +bad1_lz="${testdir}"/test_bad_crc.lz +touch empty empty.bz2 empty.gz empty.lz +fail=0 +test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } + +printf "testing zcat-%s..." "$2" + +for i in ${extensions}; do + "${ZCAT}" -N in.$i > copy || test_failed $LINENO $i + cmp in copy || test_failed $LINENO $i + "${ZCAT}" -N empty.$i in.$i > copy || test_failed $LINENO $i + cmp in copy || test_failed $LINENO $i + "${ZCAT}" -N --format=un in.$i > copy || test_failed $LINENO $i + cmp in copy || test_failed $LINENO $i + "${ZCAT}" -N --force-format=$i in.$i > copy || test_failed $LINENO $i + cmp in copy || test_failed $LINENO $i + "${ZCAT}" -N in.$i | dd bs=1000 count=1 > copy 2> /dev/null || + test_failed $LINENO $i + dd if=in bs=1000 count=1 2> /dev/null | cmp - copy || + test_failed $LINENO $i +done + +printf "LZIP\001-.............................." | "${ZCAT}" -N > /dev/null 2>&1 +[ $? = 1 ] || test_failed $LINENO +printf "LZIPxxxxxx" | "${ZCAT}" -N > /dev/null || test_failed $LINENO +printf "BZh9xxxxxx" | "${ZCAT}" -N > /dev/null || test_failed $LINENO +"${ZCAT}" -N -v -s "${testdir}"/zcat_vs.dat > /dev/null || test_failed $LINENO +"${ZCAT}" -N < in > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${ZCAT}" -N < in.gz > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${ZCAT}" -N < in.bz2 > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${ZCAT}" -N < in.lz > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${ZCAT}" -N -O lz - - < in.lz > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${ZCAT}" -N --lz='lzip -q' < in.lz > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${ZCAT}" -N in > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${ZCAT}" -N lz_only > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +cat in.lz in in in in | "${ZCAT}" -N > copy || test_failed $LINENO # tdata +cmp in copy || test_failed $LINENO +"${ZCAT}" -N in in.gz in.bz2 in.lz -- -in- -in-.lz > copy || test_failed $LINENO +cmp in6 copy || test_failed $LINENO +"${ZCAT}" -Nq in in.gz in.bz2 in.lz "${bad0_lz}" -- -in- -in-.lz > copy +[ $? = 1 ] || test_failed $LINENO +cmp in6 copy || test_failed $LINENO +"${ZCAT}" -Nq "${bad1_lz}" -- -in-.lz in in.gz in.bz2 in.lz > copy +[ $? = 1 ] || test_failed $LINENO +cmp in6 copy || test_failed $LINENO +"${ZCAT}" -N . || test_failed $LINENO +"${ZCAT}" -N -r . > /dev/null || test_failed $LINENO +"${ZCAT}" -N -r > /dev/null || test_failed $LINENO +"${ZCAT}" -N -R . > /dev/null || test_failed $LINENO +"${ZCAT}" -N -R > /dev/null || test_failed $LINENO + +"${ZCAT}" -Nq "" < in.lz > /dev/null +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --format=, in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --format=,lz in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --format=gz,,lz in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --format=lz,, in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --format=nc in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --lz='-lzip -q' in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -N --force-format=gz in.bz2 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -N --force-format=bz2 in.lz 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --force-format=lz in.gz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -N --bad-option 2> /dev/null +[ $? = 1 ] || test_failed $LINENO + + +printf "\ntesting zcmp-%s..." "$2" + +for i in ${extensions}; do + "${ZCMP}" -N in.$i || test_failed $LINENO $i + "${ZCMP}" -N in in.$i || test_failed $LINENO $i + "${ZCMP}" -N in in.$i --force-format=,$i || test_failed $LINENO $i + "${ZCMP}" -N in.$i in || test_failed $LINENO $i + "${ZCMP}" -N in.$i in --force-format=$i || test_failed $LINENO $i + "${ZCMP}" -N -i 1kB:1000 -n 500 in6 in.$i || test_failed $LINENO $i + "${ZCMP}" -N -i 1KiB:1024 -n 50 in.$i in6 || test_failed $LINENO $i + "${ZCMP}" -N empty empty.$i || test_failed $LINENO $i +done + +"${ZCMP}" -Nq in in6 +[ $? = 1 ] || test_failed $LINENO +"${ZCMP}" -N -n 0 in in6 || test_failed $LINENO +"${ZCMP}" -N -n 100B in in6 || test_failed $LINENO +"${ZCMP}" -N -n 1k in in6 || test_failed $LINENO +"${ZCMP}" -N -n 10kB in in6 || test_failed $LINENO +"${ZCMP}" -Nq in.tar pin.tar +[ $? = 1 ] || test_failed $LINENO +"${ZCMP}" -Nq -i 0B:11B in.tar pin.tar +[ $? = 1 ] || test_failed $LINENO +"${ZCMP}" -N -i 0:11 -n 0 in.tar pin.tar || test_failed $LINENO +"${ZCMP}" -N -i 0:11 -n 100 in.tar pin.tar || test_failed $LINENO +"${ZCMP}" -N -i 0:11 -n 1Ki in.tar pin.tar || test_failed $LINENO +"${ZCMP}" -N -i 0:11 -n 10KiB in.tar pin.tar || test_failed $LINENO +"${ZCMP}" -N - - || test_failed $LINENO +"${ZCMP}" -Nq - +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -N in in || test_failed $LINENO +"${ZCMP}" -N in || test_failed $LINENO +"${ZCMP}" -N --format=gz,bz2 in || test_failed $LINENO +"${ZCMP}" -N --format=gz in || test_failed $LINENO +"${ZCMP}" -N in.lz in.gz || test_failed $LINENO +"${ZCMP}" -N --lz='lzip -q' in.lz in.gz || test_failed $LINENO +"${ZCMP}" -N in.gz -- -in-.lz || test_failed $LINENO +"${ZCMP}" -N -- -in-.lz in.gz || test_failed $LINENO +"${ZCMP}" -N in -- -in-.lz || test_failed $LINENO +"${ZCMP}" -N -- -in- in.lz || test_failed $LINENO +"${ZCMP}" -N in.lz -- -in- || test_failed $LINENO +"${ZCMP}" -N -- -in-.lz in || test_failed $LINENO +"${ZCMP}" -N -- -in- in || test_failed $LINENO +"${ZCMP}" -N in -- -in- || test_failed $LINENO +"${ZCMP}" -N in.lz - < in || test_failed $LINENO +"${ZCMP}" -N - in.lz < in || test_failed $LINENO +"${ZCMP}" -N in - < in.lz || test_failed $LINENO +"${ZCMP}" -N - in < in.lz || test_failed $LINENO +"${ZCMP}" -N lz_only.lz - < in || test_failed $LINENO +"${ZCMP}" -Nq lz_only.lz +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -Nq "" in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -Nq --force-format=lz in.lz +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -Nq --force-format=lz in.gz in.lz +r=$? +{ [ $r = 1 ] || [ $r = 2 ] ; } || test_failed $LINENO +"${ZCMP}" -Nq -i 100BB in in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -Nq -i 100BB:100 in in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -Nq -i 100: in in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -Nq -n -1 in in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -N -q -n 100BB in in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -N --bad-option in in 2> /dev/null +[ $? = 2 ] || test_failed $LINENO + + +printf "\ntesting zdiff-%s..." "$2" + +for i in ${extensions}; do + "${ZDIFF}" -N in.$i > /dev/null || test_failed $LINENO $i + "${ZDIFF}" -N in in.$i > /dev/null || test_failed $LINENO $i + "${ZDIFF}" -N in in.$i --force-format=,$i > /dev/null || + test_failed $LINENO $i + "${ZDIFF}" -N in.$i in > /dev/null || test_failed $LINENO $i + "${ZDIFF}" -N in.$i in --force-format=$i, > /dev/null || + test_failed $LINENO $i + "${ZDIFF}" -N empty empty.$i > /dev/null || test_failed $LINENO $i +done + +"${ZDIFF}" -N in in6 > /dev/null +[ $? = 1 ] || test_failed $LINENO +# GNU diff 3.0 returns 2 when binary files differ +"${ZDIFF}" -N in.tar pin.tar > /dev/null && test_failed $LINENO +"${ZDIFF}" -N - - || test_failed $LINENO +"${ZDIFF}" -N - 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N in in || test_failed $LINENO +"${ZDIFF}" -N in || test_failed $LINENO +"${ZDIFF}" -N --format=gz,bz2 in || test_failed $LINENO +"${ZDIFF}" -N --format=gz in || test_failed $LINENO +"${ZDIFF}" -N in.lz in.gz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N --lz='lzip -q' in.lz in.gz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in.gz -- -in-.lz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N -- -in-.lz in.gz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in -- -in-.lz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N -- -in- in.lz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in.lz -- -in- > /dev/null || test_failed $LINENO +"${ZDIFF}" -N -- -in-.lz in > /dev/null || test_failed $LINENO +"${ZDIFF}" -N -- -in- in > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in -- -in- > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in.lz - < in > /dev/null || test_failed $LINENO +"${ZDIFF}" -N - in.lz < in > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in - < in.lz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N - in < in.lz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N lz_only.lz - < in > /dev/null || test_failed $LINENO +"${ZDIFF}" -N lz_only.lz 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N "" in 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --bz2='-bzip2' in.bz2 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -Nq --force-format=bz2 in.bz2 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N -q --force-format=,lz in.lz in.bz2 > /dev/null 2>&1 +r=$? +{ [ $r = 1 ] || [ $r = 2 ] ; } || test_failed $LINENO +"${ZDIFF}" -N --bad-option 2> /dev/null +[ $? = 2 ] || test_failed $LINENO + +mkdir tmp2 +cat in > tmp2/a || framework_failure +cat in.lz > tmp2/a.lz || framework_failure +"${ZDIFF}" -N --format=bz2 tmp2/a 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --format=gz tmp2/a 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --format=lz tmp2/a.lz 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --format=lz tmp2/a || test_failed $LINENO +"${ZDIFF}" -N --format=un tmp2/a.lz || test_failed $LINENO +rm -r tmp2 || framework_failure + + +printf "\ntesting zgrep-%s..." "$2" + +for i in ${extensions}; do + "${ZGREP}" -N "GNU" in.$i > /dev/null || test_failed $LINENO $i + "${ZGREP}" -N "GNU" in.$i hello.$i > /dev/null || test_failed $LINENO $i + "${ZGREP}" -N "GNU" hello.$i in.$i > /dev/null || test_failed $LINENO $i + "${ZGREP}" -N -q "GNU" in.$i hello.$i || test_failed $LINENO $i + "${ZGREP}" -N -q "GNU" hello.$i in.$i || test_failed $LINENO $i + "${ZGREP}" -N "GNU" < in.$i > /dev/null || test_failed $LINENO $i + "${ZGREP}" -N -l "GNU" in.$i > /dev/null || test_failed $LINENO $i + "${ZGREP}" -N -L "GNU" in.$i || test_failed $LINENO $i + "${ZGREP}" -N --force-format=$i "GNU" in.$i > /dev/null || + test_failed $LINENO $i + "${ZGREP}" -N -v "nx_pattern" in.$i > /dev/null || + test_failed $LINENO $i + "${ZGREP}" -N "nx_pattern" in.$i && test_failed $LINENO $i + "${ZGREP}" -N -l "nx_pattern" in.$i && test_failed $LINENO $i + "${ZGREP}" -N -L "nx_pattern" in.$i > /dev/null && + test_failed $LINENO $i + "${ZGREP}" -N --force-format=$i "GNU" in 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + "${ZGREP}" -N "nx_pattern" empty.$i && test_failed $LINENO $i +done + +"${ZGREP}" -N pin.tar -e "GNU" > /dev/null || test_failed $LINENO +"${ZGREP}" -N "GNU" < pin.tar > /dev/null || test_failed $LINENO +"${ZGREP}" -N -r "GNU" . > /dev/null || test_failed $LINENO +"${ZGREP}" -N -r "GNU" > /dev/null || test_failed $LINENO +"${ZGREP}" -N -R "GNU" . > /dev/null || test_failed $LINENO +"${ZGREP}" -N -R "GNU" > /dev/null || test_failed $LINENO +"${ZGREP}" -N "nx_pattern" -r . in > /dev/null && test_failed $LINENO +"${ZGREP}" -N -e "GNU" in > /dev/null || test_failed $LINENO +"${ZGREP}" -N "GNU" < in > /dev/null || test_failed $LINENO +"${ZGREP}" -N -O lz "nx_pattern" - - < in.lz > /dev/null && test_failed $LINENO +"${ZGREP}" -N -e "-free" --lz='lzip -q' < in.lz > /dev/null || + test_failed $LINENO +"${ZGREP}" -N -- "-free" -in- > /dev/null || test_failed $LINENO +"${ZGREP}" -N -q -- "-free" nx_file -in-.lz || test_failed $LINENO +"${ZGREP}" -N "GNU" in in.gz in.bz2 in.lz -- -in- > /dev/null || + test_failed $LINENO +"${ZGREP}" -N -l "GNU" in in.gz in.bz2 in.lz -- -in- > /dev/null || + test_failed $LINENO +"${ZGREP}" -N -L "GNU" in in.gz in.bz2 in.lz -- -in- || test_failed $LINENO +"${ZGREP}" -N -l "nx_pattern" in in.gz in.bz2 in.lz -- -in- && + test_failed $LINENO +"${ZGREP}" -N -L "nx_pattern" in in.gz in.bz2 in.lz -- -in- > /dev/null && + test_failed $LINENO +"${ZGREP}" -Nq -l "01234567890" in "${bad1_lz}" in.lz && test_failed $LINENO +"${ZGREP}" -Nq -l "01234567890" in "${bad1_lz}" in.lz pin.tar > /dev/null || + test_failed $LINENO + +"${ZGREP}" -N "GNU" . +[ $? = 1 ] || test_failed $LINENO +"${ZGREP}" -N "GNU" "" < in.lz 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZGREP}" -N --bad-option 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZGREP}" -N "GNU" -s nx_file +[ $? = 2 ] || test_failed $LINENO +"${ZGREP}" -N -q +[ $? = 2 ] || test_failed $LINENO + +"${ZEGREP}" -N "GNU" in > /dev/null || test_failed $LINENO +"${ZFGREP}" -N "GNU" in > /dev/null || test_failed $LINENO + + +printf "\ntesting ztest-%s..." "$2" + +for i in ${extensions}; do + "${ZTEST}" -N --force-format=$i < in.$i || test_failed $LINENO $i + "${ZTEST}" -N --force-format=$i < in 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + "${ZTEST}" -N --force-format=$i in 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i +done + +"${ZTEST}" -N in in.gz in.bz2 in.lz -- -in- || test_failed $LINENO +"${ZTEST}" -N < in.gz || test_failed $LINENO +"${ZTEST}" -N < in.bz2 || test_failed $LINENO +"${ZTEST}" -N < in.lz || test_failed $LINENO +"${ZTEST}" -N - < in.lz || test_failed $LINENO +"${ZTEST}" -N - in.gz - < in.lz || test_failed $LINENO +"${ZTEST}" -N --lz='lzip -q' < in.lz || test_failed $LINENO +"${ZTEST}" -N -r . || test_failed $LINENO +"${ZTEST}" -N -r || test_failed $LINENO +"${ZTEST}" -N -R . || test_failed $LINENO +"${ZTEST}" -N -R || test_failed $LINENO +"${ZTEST}" -N empty empty.bz2 empty.gz empty.lz || test_failed $LINENO + +"${ZTEST}" -Nq in.gz "${bad0_lz}" in.bz2 "${bad1_lz}" in.lz +[ $? = 2 ] || test_failed $LINENO +lines=`"${ZTEST}" -N in.gz "${bad0_lz}" in.bz2 "${bad1_lz}" in.lz 2>&1 | wc -l` +[ "${lines}" -eq 2 ] || test_failed $LINENO "${lines}" +lines=`"${ZTEST}" -Nv in.gz "${bad0_lz}" in.bz2 "${bad1_lz}" in.lz 2>&1 | wc -l` +[ "${lines}" -eq 6 ] || test_failed $LINENO "${lines}" +"${ZTEST}" -Nq < in +[ $? = 2 ] || test_failed $LINENO +"${ZTEST}" -Nq "" < in.lz +[ $? = 1 ] || test_failed $LINENO +dd if=in.lz bs=1000 count=1 2> /dev/null | "${ZTEST}" -N -q +[ $? = 2 ] || test_failed $LINENO +"${ZTEST}" -Nq --force-format=lz in.bz2 +[ $? = 2 ] || test_failed $LINENO +"${ZTEST}" -N --lz='lzip --bad-option' in.lz 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${ZTEST}" -N --bad-option 2> /dev/null +[ $? = 1 ] || test_failed $LINENO + + +printf "\ntesting zupdate-%s..." "$2" + +"${ZUPDATE}" -N "" || test_failed $LINENO +cat in.bz2 > a.bz2 || framework_failure +cat in.gz > a.gz || framework_failure +"${ZUPDATE}" -Nq --bz2=bad_command a.bz2 +[ $? = 1 ] || test_failed $LINENO +"${ZUPDATE}" -N --bz2='bzip2 --bad-option' a.bz2 > /dev/null 2>&1 +[ $? = 1 ] || test_failed $LINENO +"${ZUPDATE}" -Nq --gz=bad_command a.gz +[ $? = 1 ] || test_failed $LINENO +"${ZUPDATE}" -N --gz='gzip --bad-option' a.gz 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${ZUPDATE}" -Nq --lz=bad_command a.gz +[ $? = 1 ] || test_failed $LINENO +"${ZUPDATE}" -N --lz='lzip --bad-option' a.gz 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${ZUPDATE}" -N --bad-option 2> /dev/null +[ $? = 1 ] || test_failed $LINENO + +cat in.lz in.lz > a.lz || framework_failure +"${ZUPDATE}" -Nq -f a.bz2 a.gz +[ $? = 1 ] || test_failed $LINENO +[ -e a.bz2 ] || test_failed $LINENO +[ -e a.gz ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +"${ZUPDATE}" -N a.bz2 || test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ -e a.gz ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure +"${ZUPDATE}" -N a.gz || test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ ! -e a.gz ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +cat in.bz2 > a.bz2 || framework_failure +cat in.gz > a.gz || framework_failure +"${ZUPDATE}" -Nq a.bz2 a.gz +[ $? = 1 ] || test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ -e a.gz ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +cat in.bz2 > a.bz2 || framework_failure +cat in.gz > a.gz || framework_failure +"${ZUPDATE}" -N -f -k a.bz2 a.gz || test_failed $LINENO +[ -e a.bz2 ] || test_failed $LINENO +[ -e a.gz ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +cat in.bz2 > a.bz2 || framework_failure +cat in.gz > a.gz || framework_failure +"${ZUPDATE}" -N -f a.bz2 a.gz || test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ ! -e a.gz ] || test_failed $LINENO +[ ! -e a ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +cat in.bz2 > a.tbz || framework_failure # keep combined extensions +cat in.bz2 > b.tbz2 || framework_failure +cat in.gz > c.tgz || framework_failure +"${ZUPDATE}" -N a.tbz b.tbz2 c.tgz || test_failed $LINENO +[ ! -e a.tbz ] || test_failed $LINENO +[ ! -e b.tbz2 ] || test_failed $LINENO +[ ! -e c.tgz ] || test_failed $LINENO +[ ! -e a ] || test_failed $LINENO +[ ! -e b ] || test_failed $LINENO +[ ! -e c ] || test_failed $LINENO +[ ! -e a.lz ] || test_failed $LINENO +[ ! -e b.lz ] || test_failed $LINENO +[ ! -e c.lz ] || test_failed $LINENO +[ -e a.tlz ] || test_failed $LINENO +[ -e b.tlz ] || test_failed $LINENO +[ -e c.tlz ] || test_failed $LINENO +rm -f a.tlz b.tlz c.tlz || framework_failure + +cat in.bz2 > a.bz2 || framework_failure +cat "${bad0_gz}" > b.gz || framework_failure +cat in.gz > c.gz || framework_failure +"${ZUPDATE}" -N -f a.bz2 b.gz c.gz 2> /dev/null && test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ -e b.gz ] || test_failed $LINENO +[ -e c.gz ] || test_failed $LINENO +[ ! -e a ] || test_failed $LINENO +[ ! -e b ] || test_failed $LINENO +[ ! -e c ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz b.gz c.gz || framework_failure + +cat in.bz2 > a.bz2 || framework_failure +"${ZUPDATE}" -N -1 -q a.bz2 || test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +cat in.gz > 'name with spaces.gz' || framework_failure +"${ZUPDATE}" -N -1 -q 'name with spaces.gz' || test_failed $LINENO +[ ! -e 'name with spaces.gz' ] || test_failed $LINENO +[ -e 'name with spaces.lz' ] || test_failed $LINENO +"${ZCMP}" -N in 'name with spaces.lz' || test_failed $LINENO +rm -f 'name with spaces.lz' || framework_failure + +mkdir tmp2 +mkdir tmp2/tmp3 +cat in.bz2 > tmp2/tmp3/a.bz2 || framework_failure +cat in.gz > tmp2/tmp3/a.gz || framework_failure +"${ZUPDATE}" -N -r --format=gz tmp2 || test_failed $LINENO +[ -e tmp2/tmp3/a.bz2 ] || test_failed $LINENO +[ ! -e tmp2/tmp3/a.gz ] || test_failed $LINENO +[ -e tmp2/tmp3/a.lz ] || test_failed $LINENO +rm -f tmp2/tmp3/a.lz || framework_failure +"${ZUPDATE}" -N -r --format=bz2 tmp2 || test_failed $LINENO +[ ! -e tmp2/tmp3/a.bz2 ] || test_failed $LINENO +[ ! -e tmp2/tmp3/a.gz ] || test_failed $LINENO +[ -e tmp2/tmp3/a.lz ] || test_failed $LINENO +rm -f tmp2/tmp3/a.lz || framework_failure +cat in.bz2 > tmp2/tmp3/a.bz2 || framework_failure +cat in.gz > tmp2/tmp3/a.gz || framework_failure +cd tmp2 || framework_failure +"${ZUPDATE}" -N -r -k -f . || test_failed $LINENO +[ -e tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp3/a.gz ] || test_failed $LINENO +[ -e tmp3/a.lz ] || test_failed $LINENO +rm -f tmp3/a.lz || framework_failure +"${ZUPDATE}" -N -r -k -f || test_failed $LINENO +[ -e tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp3/a.gz ] || test_failed $LINENO +[ -e tmp3/a.lz ] || test_failed $LINENO +rm -f tmp3/a.lz || framework_failure +"${ZUPDATE}" -N -R -k -f . || test_failed $LINENO +[ -e tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp3/a.gz ] || test_failed $LINENO +[ -e tmp3/a.lz ] || test_failed $LINENO +rm -f tmp3/a.lz || framework_failure +"${ZUPDATE}" -N -R -k -f || test_failed $LINENO +[ -e tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp3/a.gz ] || test_failed $LINENO +[ -e tmp3/a.lz ] || test_failed $LINENO +rm -f tmp3/a.lz || framework_failure +"${ZUPDATE}" -N -r -f . || test_failed $LINENO +[ ! -e tmp3/a.bz2 ] || test_failed $LINENO +[ ! -e tmp3/a.gz ] || test_failed $LINENO +[ -e tmp3/a.lz ] || test_failed $LINENO +cd .. || framework_failure +rm -r tmp2 || framework_failure + +rm -f empty empty.bz2 empty.gz empty.lz || framework_failure +if ln -s '.' slink 2> /dev/null ; then + "${ZCAT}" -N -r slink > /dev/null || test_failed $LINENO + "${ZGREP}" -N -r "GNU" slink > /dev/null || test_failed $LINENO + "${ZTEST}" -N -r slink || test_failed $LINENO + "${ZUPDATE}" -N -r -f slink || test_failed $LINENO +else + printf "\nwarning: skipping link test: 'ln' does not work on your system." +fi +rm -f slink || framework_failure + +echo +if [ ${fail} = 0 ] ; then + echo "tests completed successfully." + cd "${objdir}" && rm -r tmp +else + echo "tests failed." +fi +exit ${fail} diff --git a/testsuite/test.txt b/testsuite/test.txt new file mode 100644 index 0000000..9196a3a --- /dev/null +++ b/testsuite/test.txt @@ -0,0 +1,676 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/testsuite/test.txt.tar b/testsuite/test.txt.tar new file mode 100644 index 0000000..92d6f1b Binary files /dev/null and b/testsuite/test.txt.tar differ diff --git a/testsuite/test_bad_crc.lz b/testsuite/test_bad_crc.lz new file mode 100644 index 0000000..c7d5bc9 Binary files /dev/null and b/testsuite/test_bad_crc.lz differ diff --git a/testsuite/zcat_vs.dat b/testsuite/zcat_vs.dat new file mode 100644 index 0000000..29978fd --- /dev/null +++ b/testsuite/zcat_vs.dat @@ -0,0 +1,68 @@ +Worst case test file for zcat -vs. +First 4096 input bytes produce 4095 output bytes because of -s. +Next 4096 input bytes produce 16384 output bytes, accumulating a total +of 20479 bytes in the output buffer. +---------------------------------------------- + + +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +............................................................... +€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€ \ No newline at end of file diff --git a/testsuite/zero_bad_crc.gz b/testsuite/zero_bad_crc.gz new file mode 100644 index 0000000..a2a9991 Binary files /dev/null and b/testsuite/zero_bad_crc.gz differ diff --git a/testsuite/zero_bad_crc.lz b/testsuite/zero_bad_crc.lz new file mode 100644 index 0000000..0d3cc93 Binary files /dev/null and b/testsuite/zero_bad_crc.lz differ diff --git a/zcat.cc b/zcat.cc new file mode 100644 index 0000000..62b93aa --- /dev/null +++ b/zcat.cc @@ -0,0 +1,386 @@ +/* Zcat - decompress and concatenate files to standard output + Copyright (C) 2010-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(__MSVCRT__) || defined(__OS2__) +#include +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + + +namespace { + +#include "recursive.cc" +#include "zcatgrep.cc" + +struct Cat_options + { + int number_lines; // 0 = no, 1 = nonblank, 2 = all + bool show_ends; + bool show_nonprinting; + bool show_tabs; + bool squeeze_blank; + + Cat_options() + : number_lines( 0 ), show_ends( false ), show_nonprinting( false ), + show_tabs( false ), squeeze_blank( false ) {} + }; + + +class Line_number // unlimited size line counter + { + std::string str; + unsigned first_digit_pos; + +public: + Line_number() : str( " 0\t" ), first_digit_pos( 5 ) {} + + void next() + { + for( unsigned i = str.size() - 1; i > first_digit_pos; ) + { + if( str[--i] < '9' ) { ++str[i]; return; } + str[i] = '0'; + } + if( first_digit_pos > 0 ) str[--first_digit_pos] = '1'; + else str.insert( str.begin() + first_digit_pos, '1' ); + } + + int sprint( uint8_t * const buf ) + { + std::memcpy( buf, str.c_str(), str.size() ); + return str.size(); + } + }; + +Line_number line_number; + + +void show_help() + { + std::printf( "zcat copies each file argument to standard output in sequence. If any\n" + "file given is compressed, its decompressed content is copied. If a file\n" + "given does not exist, and its name does not end with one of the known\n" + "extensions, zcat tries the compressed file names corresponding to the\n" + "formats supported. If a file fails to decompress, zcat continues copying the\n" + "rest of the files.\n" + "\nIf a file is specified as '-', data are read from standard input,\n" + "decompressed if needed, and sent to standard output. Data read from\n" + "standard input must be of the same type; all uncompressed or all in the\n" + "same compressed format.\n" + "\nIf no files are specified, recursive searches examine the current\n" + "working directory, and nonrecursive searches read standard input.\n" + "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" + "\nUsage: zcat [options] [files]\n" + "\nExit status is 0 if no errors occurred, 1 otherwise.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -A, --show-all equivalent to '-vET'\n" + " -b, --number-nonblank number nonblank output lines\n" + " -e equivalent to '-vE'\n" + " -E, --show-ends display '$' at end of each line\n" + " -M, --format= process only the formats in \n" + " -n, --number number all output lines\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -O, --force-format= force the format given (bz2, gz, lz, xz)\n" + " -q, --quiet suppress all messages\n" + " -r, --recursive operate recursively on directories\n" + " -R, --dereference-recursive recursively follow symbolic links\n" + " -s, --squeeze-blank never more than one single blank line\n" + " -t equivalent to '-vT'\n" + " -T, --show-tabs display TAB characters as '^I'\n" + " -v, --show-nonprinting use '^' and 'M-' notation, except for LF and TAB\n" + " --verbose verbose mode (show error messages)\n" + " --bz2= set compressor and options for bzip2 format\n" + " --gz= set compressor and options for gzip format\n" + " --lz= set compressor and options for lzip format\n" + " --xz= set compressor and options for xz format\n" ); + show_help_addr(); + } + + +bool do_cat( const int infd, const int buffer_size, + uint8_t * const inbuf, uint8_t * const outbuf, + const std::string & input_filename, + const Cat_options & cat_options ) + { + static int at_bol = 1; // at begin of line. 0 = false, 1 = true, + // 2 = at begin of second blank line. + int inpos = 0; // positions in buffers + int outpos = 0; + int rd = -1; // bytes read by the last readblock + unsigned char c; + + while( true ) + { + do { + if( outpos >= buffer_size ) + { + if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos ) + { show_error( "Write error", errno ); return false; } + outpos = 0; + } + if( inpos > rd ) // inbuf is empty + { + rd = readblock( infd, inbuf, buffer_size ); + if( rd != buffer_size && errno ) + { + show_file_error( input_filename.c_str(), "Read error", errno ); + return false; + } + if( rd == 0 ) + { + if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos ) + { show_error( "Write error", errno ); return false; } + outpos = 0; + return true; + } + inpos = 0; + inbuf[rd] = '\n'; // sentinel newline + } + else // a real newline was found + { + if( at_bol > 1 ) + { + if( cat_options.squeeze_blank ) { c = inbuf[inpos++]; continue; } + } + else ++at_bol; + if( at_bol > 1 && cat_options.number_lines == 2 ) + { + line_number.next(); + outpos += line_number.sprint( &outbuf[outpos] ); + } + if( cat_options.show_ends ) outbuf[outpos++] = '$'; + outbuf[outpos++] = '\n'; // output the newline + } + c = inbuf[inpos++]; + } + while( c == '\n' ); + + if( at_bol > 0 && cat_options.number_lines ) + { + line_number.next(); + outpos += line_number.sprint( &outbuf[outpos] ); + } + at_bol = 0; + + // the loops below continue until a newline (real or sentinel) is found + + if( cat_options.show_nonprinting ) + while( true ) + { + if( c < 32 || c >= 127 ) + { + if( c == '\n' ) break; + if( c != '\t' || cat_options.show_tabs ) + { + if( c >= 128 ) + { c -= 128; outbuf[outpos++] = 'M'; outbuf[outpos++] = '-'; } + if( c < 32 ) { c += 64; outbuf[outpos++] = '^'; } + else if( c == 127 ) { c = '?'; outbuf[outpos++] = '^'; } + } + } + outbuf[outpos++] = c; + c = inbuf[inpos++]; + } + else // not quoting + while( c != '\n' ) + { + if( c == '\t' && cat_options.show_tabs ) + { c += 64; outbuf[outpos++] = '^'; } + outbuf[outpos++] = c; + c = inbuf[inpos++]; + } + } + } + + +bool cat( int infd, const int format_index, const std::string & input_filename, + const Cat_options & cat_options ) + { + enum { buffer_size = 4096, outbuf_size = (5 * buffer_size) + 256 + 1 }; + // input buffer with space for sentinel newline at the end + uint8_t * const inbuf = new uint8_t[buffer_size+1]; + // output buffer with space for character quoting, 255-digit line number, + // worst case flushing respect to inbuf, and a canary byte. + uint8_t * const outbuf = new uint8_t[outbuf_size]; + outbuf[outbuf_size-1] = 0; + Children children; + bool error = false; + + if( !set_data_feeder( input_filename, &infd, children, format_index ) || + !do_cat( infd, buffer_size, inbuf, outbuf, input_filename, cat_options ) ) + error = true; + if( !good_status( children, !error ) ) error = true; + if( !error && close( infd ) != 0 ) { show_close_error(); error = true; } + if( outbuf[outbuf_size-1] != 0 ) internal_error( "buffer overflow." ); + delete[] outbuf; delete[] inbuf; + return !error; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { verbose_opt = 256, bz2_opt, gz_opt, lz_opt, xz_opt }; + int format_index = -1; + int recursive = 0; // 1 = '-r', 2 = '-R' + std::list< std::string > filenames; + Cat_options cat_options; + program_name = "zcat"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'A', "show-all", Arg_parser::no }, // cat + { 'b', "number-nonblank", Arg_parser::no }, // cat + { 'c', "stdout", Arg_parser::no }, // gzip + { 'd', "decompress", Arg_parser::no }, // gzip + { 'e', 0, Arg_parser::no }, // cat + { 'E', "show-ends", Arg_parser::no }, // cat + { 'f', "force", Arg_parser::no }, // gzip + { 'h', "help", Arg_parser::no }, + { 'l', "list", Arg_parser::no }, // gzip + { 'L', "license", Arg_parser::no }, // gzip + { 'M', "format", Arg_parser::yes }, + { 'n', "number", Arg_parser::no }, // cat + { 'N', "no-rcfile", Arg_parser::no }, + { 'O', "force-format", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'R', "dereference-recursive", Arg_parser::no }, + { 's', "squeeze-blank", Arg_parser::no }, // cat + { 't', 0, Arg_parser::no }, // cat + { 'T', "show-tabs", Arg_parser::no }, // cat + { 'v', "show-nonprinting", Arg_parser::no }, // cat + { 'V', "version", Arg_parser::no }, + { verbose_opt, "verbose", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { 0 , 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 1; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case 'A': cat_options.show_ends = true; + cat_options.show_nonprinting = true; + cat_options.show_tabs = true; break; + case 'b': cat_options.number_lines = 1; break; + case 'c': break; + case 'd': break; + case 'e': cat_options.show_nonprinting = true; // fall through + case 'E': cat_options.show_ends = true; break; + case 'f': break; + case 'h': show_help(); return 0; + case 'l': break; + case 'L': break; + case 'M': parse_format_list( arg ); break; + case 'n': if( cat_options.number_lines == 0 ) + { cat_options.number_lines = 2; } break; + case 'N': break; + case 'O': format_index = parse_format_type( arg ); break; + case 'q': verbosity = -1; break; + case 'r': recursive = 1; break; + case 'R': recursive = 2; break; + case 's': cat_options.squeeze_blank = true; break; + case 't': cat_options.show_nonprinting = true; // fall through + case 'T': cat_options.show_tabs = true; break; + case 'v': cat_options.show_nonprinting = true; break; + case 'V': show_version(); return 0; + case verbose_opt: if( verbosity < 4 ) ++verbosity; break; + case bz2_opt: parse_compressor( arg, fmt_bz2, 1 ); break; + case gz_opt: parse_compressor( arg, fmt_gz, 1 ); break; + case lz_opt: parse_compressor( arg, fmt_lz, 1 ); break; + case xz_opt: parse_compressor( arg, fmt_xz, 1 ); break; + default : internal_error( "uncaught option." ); + } + } // end process options + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + for( ; argind < parser.arguments(); ++argind ) + filenames.push_back( parser.argument( argind ) ); + + if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" ); + + std::string input_filename; + bool error = false; + bool stdin_used = false; + while( next_filename( filenames, input_filename, error, recursive ) ) + { + int infd; + if( input_filename == "." ) + { + if( stdin_used ) continue; else stdin_used = true; + infd = STDIN_FILENO; input_filename = "-"; + } + else + { + infd = open_instream( input_filename, format_index < 0 ); + if( infd < 0 ) { error = true; continue; } + } + + if( !cat( infd, format_index, input_filename, cat_options ) ) error = true; + + if( close( infd ) != 0 ) + { show_file_error( input_filename.c_str(), "Error closing input file", + errno ); error = true; } + } + + if( std::fclose( stdout ) != 0 ) + { + show_error( "Error closing stdout", errno ); + error = true; + } + return error; + } diff --git a/zcatgrep.cc b/zcatgrep.cc new file mode 100644 index 0000000..31d54e6 --- /dev/null +++ b/zcatgrep.cc @@ -0,0 +1,59 @@ +/* Common code for zcat and zgrep + Copyright (C) 2010-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +int simple_extension_index( const std::string & name ) + { + for( int i = 0; i < num_formats; ++i ) + { + const std::string ext( simple_extensions[i] ); + if( name.size() > ext.size() && + name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 ) + return i; + } + return -1; + } + + +int open_instream( std::string & input_filename, const bool search, + const bool no_messages = false ) + { + int infd = open( input_filename.c_str(), O_RDONLY | O_BINARY ); + if( infd < 0 ) + { + const int saved_errno = errno; + if( search && simple_extension_index( input_filename ) < 0 ) + { + for( int i = 0; i < num_formats; ++i ) + if( enabled_format( format_order[i] ) ) + { + const std::string name( input_filename + + simple_extensions[format_order[i]] ); + infd = open( name.c_str(), O_RDONLY | O_BINARY ); + if( infd >= 0 ) { input_filename = name; break; } + } + } + if( infd < 0 && !no_messages ) + show_file_error( input_filename.c_str(), "Can't open input file", + saved_errno ); + } + return infd; + } diff --git a/zcmp.cc b/zcmp.cc new file mode 100644 index 0000000..9daa1b9 --- /dev/null +++ b/zcmp.cc @@ -0,0 +1,471 @@ +/* Zcmp - decompress and compare two files byte by byte + Copyright (C) 2010-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(__MSVCRT__) || defined(__OS2__) +#include +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + +#ifndef LLONG_MAX +#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL +#endif + + +namespace { + +#include "zcmpdiff.cc" + +void show_help() + { + std::printf( "zcmp compares two files and, if they differ, writes to standard output the\n" + "first byte and line number where they differ. Bytes and lines are numbered\n" + "starting with 1. A hyphen '-' used as a file argument means standard input.\n" + "If any file given is compressed, its decompressed content is used. Compressed\n" + "files are decompressed on the fly; no temporary files are created.\n" + "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" + "\nUsage: zcmp [options] file1 [file2]\n" + "\nzcmp compares file1 to file2. The standard input is used only if file1 or\n" + "file2 refers to standard input. If file2 is omitted zcmp tries the\n" + "following:\n" + "\n - If file1 is compressed, compares its decompressed contents with\n" + " the corresponding uncompressed file (the name of file1 with the\n" + " extension removed).\n" + "\n - If file1 is uncompressed, compares it with the decompressed\n" + " contents of file1.[lz|bz2|gz|xz] (the first one that is found).\n" + "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -b, --print-bytes print differing bytes\n" + " -i, --ignore-initial=[:] ignore differences in the first bytes\n" + " -l, --list list position, value of all differing bytes\n" + " -M, --format= process only the formats in \n" + " -n, --bytes= compare at most bytes\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -O, --force-format=[][,] force the formats given (bz2, gz, lz, xz)\n" + " -q, --quiet suppress all messages\n" + " -s, --silent (same as --quiet)\n" + " -v, --verbose verbose mode (same as --list)\n" + " --bz2= set compressor and options for bzip2 format\n" + " --gz= set compressor and options for gzip format\n" + " --lz= set compressor and options for lzip format\n" + " --xz= set compressor and options for xz format\n" + "\nNumbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); + show_help_addr(); + } + + +long long getnum( const char * const ptr, const char ** const tailp = 0, + const long long llimit = 0, + const long long ulimit = LLONG_MAX ) + { + char * tail; + errno = 0; + long long result = strtoll( ptr, &tail, 0 ); + if( tail == ptr ) + { + show_error( "Bad or missing numerical argument.", 0, true ); + std::exit( 2 ); + } + if( result < 0 ) errno = ERANGE; + + if( !errno && tail[0] && std::isalpha( tail[0] ) ) + { + const unsigned char ch = *tail++; + int factor; + bool bsuf; // 'B' suffix is present + if( tail[0] == 'i' ) { ++tail; factor = 1024; } else factor = 1000; + if( tail[0] == 'B' ) { ++tail; bsuf = true; } else bsuf = false; + int exponent = -1; // -1 = bad multiplier + switch( ch ) + { + case 'Y': exponent = 8; break; + case 'Z': exponent = 7; break; + case 'E': exponent = 6; break; + case 'P': exponent = 5; break; + case 'T': exponent = 4; break; + case 'G': exponent = 3; break; + case 'M': exponent = 2; break; + case 'K': if( factor == 1024 ) exponent = 1; break; + case 'k': if( factor == 1000 ) exponent = 1; break; + case 'B': if( factor == 1000 && !bsuf ) exponent = 0; break; + } + if( exponent < 0 ) + { + show_error( "Bad multiplier in numerical argument.", 0, true ); + std::exit( 2 ); + } + for( int i = 0; i < exponent; ++i ) + { + if( ulimit / factor >= result ) result *= factor; + else { errno = ERANGE; break; } + } + } + if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; + if( errno ) + { + show_error( "Numerical argument out of limits." ); + std::exit( 2 ); + } + if( tailp ) *tailp = tail; + return result; + } + + +void parse_ignore_initial( const char * const arg, long long ignore_initial[2] ) + { + const char * tail; + ignore_initial[0] = getnum( arg, &tail ); + if( *tail == ':' || *tail == ',' ) + ignore_initial[1] = getnum( ++tail ); + else if( *tail == 0 ) ignore_initial[1] = ignore_initial[0]; + else + { + show_error( "Bad separator in argument of '--ignore-initial'", 0, true ); + std::exit( 2 ); + } + } + + +bool skip_ignore_initial( const long long ignore_initial, const int infd ) + { + if( ignore_initial > 0 ) + { + enum { buffer_size = 4096 }; + long long rest = ignore_initial; + uint8_t buffer[buffer_size]; + while( rest > 0 ) + { + const int size = std::min( rest, (long long)buffer_size ); + const int rd = readblock( infd, buffer, size ); + if( rd != size && errno ) return false; + if( rd < size ) break; + rest -= rd; + } + } + return true; + } + + +// Put into buf the unsigned char c, making unprintable bytes +// visible by quoting like cat -t does. +void sprintc( char * const buf, unsigned char c ) + { + int i = 0; + + if( c < 32 || c >= 127 ) + { + if( c >= 128 ) { c -= 128; buf[i++] = 'M'; buf[i++] = '-'; } + if( c < 32 ) { c += 64; buf[i++] = '^'; } + else if( c == 127 ) { c = '?'; buf[i++] = '^'; } + } + buf[i++] = c; + buf[i++] = 0; + } + + +int block_compare( const uint8_t * const buffer0, + const uint8_t * const buffer1, + unsigned long long * const line_numberp ) + { + const uint8_t * p0 = buffer0; + const uint8_t * p1 = buffer1; + + if( verbosity == 0 ) + { + int nl_count = 0; + while( *p0 == *p1 ) + { if( *p0 == '\n' ) { ++nl_count; } ++p0; ++p1; } + *line_numberp += nl_count; + } + else while( *p0 == *p1 ) { ++p0; ++p1; } + return p0 - buffer0; + } + + +int cmp( const long long max_size, const int infd[2], + const std::string filenames[2], const bool print_bytes ) + { + const int buffer_size = 4096; + unsigned long long byte_number = 1; + unsigned long long line_number = 1; + // remaining number of bytes to compare + long long rest = ( max_size >= 0 ) ? max_size : buffer_size; + // buffers with space for sentinels at the end + uint8_t * const buffer0 = new uint8_t[2*(buffer_size+1)]; + uint8_t * const buffer1 = buffer0 + buffer_size + 1; + uint8_t * buffer[2]; + buffer[0] = buffer0; buffer[1] = buffer1; + int different = 0; + + while( rest > 0 ) + { + const int size = std::min( (long long)buffer_size, rest ); + if( max_size >= 0 ) rest -= size; + int rd[2]; // number of bytes read from each file + for( int i = 0; i < 2; ++i ) + { + rd[i] = readblock( infd[i], buffer[i], size ); + if( rd[i] != size && errno ) + { + show_file_error( filenames[i].c_str(), "Read error", errno ); + return 2; + } + } + + const int min_rd = std::min( rd[0], rd[1] ); + buffer0[min_rd] = 0; // sentinels for the block compare + buffer1[min_rd] = 1; + + int first_diff = block_compare( buffer0, buffer1, &line_number ); + byte_number += first_diff; + + if( first_diff < min_rd ) + { + if( verbosity < 0 ) return 1; // return status only + if( verbosity == 0 ) // show first difference + { + if( !print_bytes ) + std::printf( "%s %s differ: byte %llu, line %llu\n", + filenames[0].c_str(), filenames[1].c_str(), + byte_number, line_number ); + else + { + const unsigned char c0 = buffer0[first_diff]; + const unsigned char c1 = buffer1[first_diff]; + char buf0[5], buf1[5]; + sprintc( buf0, c0 ); sprintc( buf1, c1 ); + std::printf( "%s %s differ: byte %llu, line %llu is %3o %s %3o %s\n", + filenames[0].c_str(), filenames[1].c_str(), + byte_number, line_number, c0, buf0, c1, buf1 ); + } + std::fflush( stdout ); + return 1; + } + else // verbosity > 0 ; show all differences + { + different = 1; + for( ; first_diff < min_rd; ++byte_number, ++first_diff ) + { + const unsigned char c0 = buffer0[first_diff]; + const unsigned char c1 = buffer1[first_diff]; + if( c0 != c1 ) + { + if( !print_bytes ) + std::printf( "%llu %3o %3o\n", byte_number, c0, c1 ); + else + { + char buf0[5], buf1[5]; + sprintc( buf0, c0 ); sprintc( buf1, c1 ); + std::printf( "%llu %3o %-4s %3o %s\n", + byte_number, c0, buf0, c1, buf1 ); + } + } + } + std::fflush( stdout ); + } + } + + if( rd[0] != rd[1] ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: EOF on %s\n", + program_name, filenames[rd[1] 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'b', "print-bytes", Arg_parser::no }, + { 'h', "help", Arg_parser::no }, + { 'i', "ignore-initial", Arg_parser::yes }, + { 'l', "list", Arg_parser::no }, + { 'M', "format", Arg_parser::yes }, + { 'n', "bytes", Arg_parser::yes }, + { 'N', "no-rcfile", Arg_parser::no }, + { 'O', "force-format", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 's', "silent", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { 0 , 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 2; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case 'b': print_bytes = true; break; + case 'h': show_help(); return 0; + case 'i': parse_ignore_initial( arg.c_str(), ignore_initial ); break; + case 'l': verbosity = 1; break; + case 'M': parse_format_list( arg ); break; + case 'n': max_size = getnum( arg.c_str() ); break; + case 'N': break; + case 'O': parse_format_types2( arg, format_types ); break; + case 'q': + case 's': verbosity = -1; break; + case 'v': verbosity = 1; break; + case 'V': show_version(); return 0; + case bz2_opt: parse_compressor( arg, fmt_bz2 ); break; + case gz_opt: parse_compressor( arg, fmt_gz ); break; + case lz_opt: parse_compressor( arg, fmt_lz ); break; + case xz_opt: parse_compressor( arg, fmt_xz ); break; + default : internal_error( "uncaught option." ); + } + } // end process options + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + if( argind >= parser.arguments() ) + { show_error( "No files given.", 0, true ); return 2; } + if( argind + 2 < parser.arguments() ) + { show_error( "Too many files.", 0, true ); return 2; } + + const int files = parser.arguments() - argind; + std::string filenames[2]; // file names of the two input files + filenames[0] = parser.argument( argind ); + if( files == 2 ) filenames[1] = parser.argument( argind + 1 ); + + int infd[2]; // file descriptors of the two files + infd[0] = ( filenames[0] == "-" ) ? + STDIN_FILENO : open_instream( filenames[0] ); + if( infd[0] < 0 ) return 2; + + if( files == 2 ) + { + if( check_identical( filenames[0].c_str(), filenames[1].c_str() ) ) + { + if( ignore_initial[0] == ignore_initial[1] ) return 0; + else { show_error( "Can't compare parts of same file." ); return 2; } + } + infd[1] = ( filenames[1] == "-" ) ? + STDIN_FILENO : open_instream( filenames[1] ); + if( infd[1] < 0 ) return 2; + } + else + { + if( filenames[0] == "-" ) + { show_error( "Missing operand after '-'.", 0, true ); return 2; } + if( format_types[0] >= 0 || format_types[1] >= 0 ) + { show_error( "Two files must be given when format is specified.", 0, true ); + return 2; } + filenames[1] = filenames[0]; + infd[1] = open_other_instream( filenames[1] ); + if( infd[1] < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't find file to compare with '%s'.\n", + program_name, filenames[0].c_str() ); + show_error( 0, 0, true ); return 2; + } + } + + int old_infd[2]; // copy of file descriptors of the two files + old_infd[0] = infd[0]; old_infd[1] = infd[1]; + Children children[2]; + if( !set_data_feeder( filenames[0], &infd[0], children[0], format_types[0] ) || + !set_data_feeder( filenames[1], &infd[1], children[1], format_types[1] ) ) + return 2; + + for( int i = 0; i < 2; ++i ) + if( !skip_ignore_initial( ignore_initial[i], infd[i] ) ) + { + show_file_error( filenames[i].c_str(), + "Read error skipping initial bytes", errno ); + return 2; + } + + int retval = cmp( max_size, infd, filenames, print_bytes ); + + for( int i = 0; i < 2; ++i ) + if( !good_status( children[i], retval == 0 && max_size < 0 ) ) retval = 2; + + for( int i = 0; i < 2; ++i ) + { + if( close( infd[i] ) != 0 ) + { show_close_error(); retval = 2; } + if( filenames[i] != "-" && close( old_infd[i] ) != 0 ) + { + show_file_error( filenames[i].c_str(), "Error closing input file", errno ); + retval = 2; + } + } + if( std::fclose( stdout ) != 0 ) + { + show_error( "Error closing stdout", errno ); + retval = 2; + } + + return retval; + } diff --git a/zcmpdiff.cc b/zcmpdiff.cc new file mode 100644 index 0000000..fceb8cf --- /dev/null +++ b/zcmpdiff.cc @@ -0,0 +1,70 @@ +/* Common code for zcmp and zdiff + Copyright (C) 2010-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +int open_instream( const std::string & input_filename ) + { + const int infd = open( input_filename.c_str(), O_RDONLY | O_BINARY ); + if( infd < 0 ) + show_file_error( input_filename.c_str(), "Can't open input file", errno ); + return infd; + } + + +int open_other_instream( std::string & name ) + { + const int eindex = extension_index( name ); + if( eindex >= 0 && enabled_format( -1 ) ) + { // open uncompressed version + name.resize( name.size() - std::strlen( extension_from( eindex ) ) ); + name += extension_to( eindex ); + return open( name.c_str(), O_RDONLY | O_BINARY ); + } + if( eindex < 0 ) // search compressed version + for( int i = 0; i < num_formats; ++i ) + if( enabled_format( format_order[i] ) ) + { + const std::string s( name + simple_extensions[format_order[i]] ); + const int infd = open( s.c_str(), O_RDONLY | O_BINARY ); + if( infd >= 0 ) { name = s; return infd; } + } + return -1; + } + + +void parse_format_types2( const std::string & arg, int format_types[2] ) + { + const unsigned i = std::min( arg.find( ',' ), arg.size() ); + if( i > 0 ) format_types[0] = parse_format_type( arg.substr( 0, i ) ); + else format_types[0] = -1; + if( i + 1 < arg.size() ) format_types[1] = + parse_format_type( arg.substr( i + 1 ) ); + else format_types[1] = -1; + } + + +bool check_identical( const char * const name1, const char * const name2 ) + { + if( std::strcmp( name1, name2 ) == 0 ) return true; + struct stat stat1, stat2; + if( stat( name1, &stat1 ) || stat( name2, &stat2 ) ) return false; + return ( stat1.st_ino == stat2.st_ino && stat1.st_dev == stat2.st_dev ); + } diff --git a/zdiff.cc b/zdiff.cc new file mode 100644 index 0000000..a173971 --- /dev/null +++ b/zdiff.cc @@ -0,0 +1,440 @@ +/* Zdiff - decompress and compare two files line by line + Copyright (C) 2010-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(__MSVCRT__) || defined(__OS2__) +#include +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + +// 'verbosity' is always 0 in zdiff; no --verbose or --quiet available. + +namespace { + +std::string fifonames[2]; // names of the two fifos passed to diff + +#include "zcmpdiff.cc" + +void show_help() + { + std::printf( "zdiff compares two files and, if they differ, writes to standard output the\n" + "differences line by line. A hyphen '-' used as a file argument means standard\n" + "input. If any file given is compressed, its decompressed content is used.\n" + "zdiff is a front end to the program diff and has the limitation that messages\n" + "from diff refer to temporary file names instead of those specified.\n" + "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" + "\nUsage: zdiff [options] file1 [file2]\n" + "\nzdiff compares file1 to file2. The standard input is used only if file1 or\n" + "file2 refers to standard input. If file2 is omitted zdiff tries the\n" + "following:\n" + "\n - If file1 is compressed, compares its decompressed contents with\n" + " the corresponding uncompressed file (the name of file1 with the\n" + " extension removed).\n" + "\n - If file1 is uncompressed, compares it with the decompressed\n" + " contents of file1.[lz|bz2|gz|xz] (the first one that is found).\n" + "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" + "Some options only work if the diff program used supports them.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -a, --text treat all files as text\n" + " -b, --ignore-space-change ignore changes in the amount of white space\n" + " -B, --ignore-blank-lines ignore changes whose lines are all blank\n" + " -c use the context output format\n" + " -C, --context= same as -c but use lines of context\n" + " -d, --minimal try hard to find a smaller set of changes\n" + " -E, --ignore-tab-expansion ignore changes due to tab expansion\n" + " -i, --ignore-case ignore case differences in file contents\n" + " -M, --format= process only the formats in \n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -O, --force-format=[][,] force the formats given (bz2, gz, lz, xz)\n" + " -p, --show-c-function show which C function each change is in\n" + " -q, --brief output only whether files differ\n" + " -s, --report-identical-files report when two files are identical\n" + " -t, --expand-tabs expand tabs to spaces in output\n" + " -T, --initial-tab make tabs line up by prepending a tab\n" + " -u use the unified output format\n" + " -U, --unified= same as -u but use lines of context\n" + " -w, --ignore-all-space ignore all white space\n" + " -W, --width= output at most print columns\n" + " -y, --side-by-side output in two columns\n" + " --bz2= set compressor and options for bzip2 format\n" + " --gz= set compressor and options for gzip format\n" + " --lz= set compressor and options for lzip format\n" + " --xz= set compressor and options for xz format\n" ); + show_help_addr(); + } + + +const char * my_basename( const char * filename ) + { + const char * c = filename; + while( *c ) { if( *c == '/' ) { filename = c + 1; } ++c; } + return filename; + } + + +extern "C" void remove_fifos() + { + if( fifonames[0].size() ) + { std::remove( fifonames[0].c_str() ); fifonames[0].clear(); } + if( fifonames[1].size() ) + { std::remove( fifonames[1].c_str() ); fifonames[1].clear(); } + } + + +/* Set fifonames[i] to "${TMPDIR}/[_-]" + and create FIFOs. The pid is coded in little endian order. +*/ +bool set_fifonames( const std::string filenames[2] ) + { + enum { num_codes = 36 }; + const char * const codes = "0123456789abcdefghijklmnopqrstuvwxyz"; + const char * p = std::getenv( "TMPDIR" ); + + if( p ) { fifonames[0] = p; fifonames[0] += '/'; } + else fifonames[0] = "/tmp/"; + int n = getpid(); + do fifonames[0] += codes[n % num_codes]; while( n /= num_codes ); + const unsigned pos = fifonames[0].size(); + fifonames[0] += '_'; + fifonames[1] = fifonames[0]; + fifonames[0] += my_basename( filenames[0].c_str() ); + fifonames[1] += my_basename( filenames[1].c_str() ); + if( fifonames[1] == fifonames[0] ) fifonames[1][pos] = '-'; + + for( int i = 0; i < 2; ++i ) + if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) != 0 ) + { + if( errno == EEXIST ) + { + std::remove( fifonames[i].c_str() ); + if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) == 0 ) + continue; + } + show_file_error( fifonames[i].c_str(), "Can't create FIFO", errno ); + return false; + } + return true; + } + + +bool set_data_feeder( const std::string & filename, + const std::string & fifoname, const int infd, + Children & children, int format_index ) + { + uint8_t magic_data[magic_buf_size]; + int magic_size = 0; + if( format_index < 0 ) + format_index = test_format( infd, magic_data, &magic_size ); + children.compressor_name = get_compressor_name( format_index ); + + if( children.compressor_name ) // compressed + { + int fda[2]; // pipe from feeder to compressor + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return false; } + const pid_t pid = fork(); + if( pid == 0 ) // child 1 (compressor feeder) + { + if( close( fda[0] ) != 0 || + !feed_data( filename, infd, fda[1], magic_data, magic_size ) ) + _exit( 2 ); + if( close( fda[1] ) != 0 ) + { show_close_error(); _exit( 2 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return false; } + + const pid_t pid2 = fork(); + if( pid2 == 0 ) // child 2 (compressor) + { + const int outfd = open( fifoname.c_str(), O_WRONLY | O_BINARY ); + if( outfd < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't open FIFO '%s' for writing: %s\n", + program_name, fifoname.c_str(), std::strerror( errno ) ); + _exit( 2 ); + } + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + dup2( outfd, STDOUT_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 && + close( outfd ) == 0 ) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const char ** const argv = new const char *[size+3]; + argv[0] = children.compressor_name; + for( int i = 0; i < size; ++i ) + argv[i+1] = compressor_args[i].c_str(); + argv[size+1] = ( verbosity >= 0 ) ? "-d" : "-dq"; + argv[size+2] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( children.compressor_name ); + _exit( 2 ); + } + if( pid2 < 0 ) // parent + { show_fork_error( children.compressor_name ); return false; } + + close( fda[0] ); close( fda[1] ); + children.pid[0] = pid; + children.pid[1] = pid2; + } + else // uncompressed + { + const pid_t pid = fork(); + if( pid == 0 ) // child (feeder) + { + const int outfd = open( fifoname.c_str(), O_WRONLY | O_BINARY ); + if( outfd < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't open FIFO '%s' for writing: %s\n", + program_name, fifoname.c_str(), std::strerror( errno ) ); + _exit( 2 ); + } + if( !feed_data( filename, infd, outfd, magic_data, magic_size ) ) + _exit( 2 ); + if( close( outfd ) != 0 ) + { show_close_error(); _exit( 2 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return false; } + children.pid[0] = pid; + children.pid[1] = 0; + } + return true; + } + + +extern "C" void signal_handler( int sig ) + { + remove_fifos(); + std::signal( sig, SIG_DFL ); + std::raise( sig ); + } + + +void set_signals() + { + std::signal( SIGHUP, signal_handler ); + std::signal( SIGINT, signal_handler ); + std::signal( SIGTERM, signal_handler ); + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt }; + std::vector< const char * > diff_args; // args to diff, maybe empty + int format_types[2] = { -1, -1 }; + program_name = "zdiff"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'a', "text", Arg_parser::no }, + { 'b', "ignore-space-change", Arg_parser::no }, + { 'B', "ignore-blank-lines", Arg_parser::no }, + { 'c', 0, Arg_parser::no }, + { 'C', "context", Arg_parser::yes }, + { 'd', "minimal", Arg_parser::no }, + { 'E', "ignore-tab-expansion", Arg_parser::no }, + { 'h', "help", Arg_parser::no }, + { 'i', "ignore-case", Arg_parser::no }, + { 'M', "format", Arg_parser::yes }, + { 'N', "no-rcfile", Arg_parser::no }, + { 'O', "force-format", Arg_parser::yes }, + { 'p', "show-c-function", Arg_parser::no }, + { 'q', "brief", Arg_parser::no }, + { 's', "report-identical-files", Arg_parser::no }, + { 't', "expand-tabs", Arg_parser::no }, + { 'T', "initial-tab", Arg_parser::no }, + { 'u', 0, Arg_parser::no }, + { 'U', "unified", Arg_parser::yes }, + { 'V', "version", Arg_parser::no }, + { 'w', "ignore-all-space", Arg_parser::no }, + { 'W', "width", Arg_parser::yes }, + { 'y', "side-by-side", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { 0 , 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 2; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case 'a': diff_args.push_back( "-a" ); break; + case 'b': diff_args.push_back( "-b" ); break; + case 'B': diff_args.push_back( "-B" ); break; + case 'c': diff_args.push_back( "-c" ); break; + case 'C': diff_args.push_back( "-C" ); + diff_args.push_back( arg.c_str() ); break; + case 'd': diff_args.push_back( "-d" ); break; + case 'E': diff_args.push_back( "-E" ); break; + case 'h': show_help(); return 0; + case 'i': diff_args.push_back( "-i" ); break; + case 'M': parse_format_list( arg ); break; + case 'N': break; + case 'O': parse_format_types2( arg, format_types ); break; + case 'p': diff_args.push_back( "-p" ); break; + case 'q': diff_args.push_back( "-q" ); break; + case 's': diff_args.push_back( "-s" ); break; + case 't': diff_args.push_back( "-t" ); break; + case 'T': diff_args.push_back( "-T" ); break; + case 'u': diff_args.push_back( "-u" ); break; + case 'U': diff_args.push_back( "-U" ); + diff_args.push_back( arg.c_str() ); break; + case 'V': show_version(); return 0; + case 'w': diff_args.push_back( "-w" ); break; + case 'W': diff_args.push_back( "-W" ); + diff_args.push_back( arg.c_str() ); break; + case 'y': diff_args.push_back( "-y" ); break; + case bz2_opt: parse_compressor( arg, fmt_bz2 ); break; + case gz_opt: parse_compressor( arg, fmt_gz ); break; + case lz_opt: parse_compressor( arg, fmt_lz ); break; + case xz_opt: parse_compressor( arg, fmt_xz ); break; + default : internal_error( "uncaught option." ); + } + } // end process options + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + if( argind >= parser.arguments() ) + { show_error( "No files given.", 0, true ); return 2; } + if( argind + 2 < parser.arguments() ) + { show_error( "Too many files.", 0, true ); return 2; } + + const int files = parser.arguments() - argind; + std::string filenames[2]; // file names of the two input files + filenames[0] = parser.argument( argind ); + if( files == 2 ) filenames[1] = parser.argument( argind + 1 ); + + int infd[2]; // file descriptors of the two files + infd[0] = ( filenames[0] == "-" ) ? + STDIN_FILENO : open_instream( filenames[0] ); + if( infd[0] < 0 ) return 2; + + if( files == 2 ) + { + if( check_identical( filenames[0].c_str(), filenames[1].c_str() ) ) + return 0; + infd[1] = ( filenames[1] == "-" ) ? + STDIN_FILENO : open_instream( filenames[1] ); + if( infd[1] < 0 ) return 2; + } + else + { + if( filenames[0] == "-" ) + { show_error( "Missing operand after '-'.", 0, true ); return 2; } + if( format_types[0] >= 0 || format_types[1] >= 0 ) + { show_error( "Two files must be given when format is specified.", 0, true ); + return 2; } + filenames[1] = filenames[0]; + infd[1] = open_other_instream( filenames[1] ); + if( infd[1] < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't find file to compare with '%s'.\n", + program_name, filenames[0].c_str() ); + show_error( 0, 0, true ); return 2; + } + } + + std::atexit( remove_fifos ); + set_signals(); + if( !set_fifonames( filenames ) ) return 2; + + Children children[2]; + if( !set_data_feeder( filenames[0], fifonames[0], infd[0], children[0], + format_types[0] ) || + !set_data_feeder( filenames[1], fifonames[1], infd[1], children[1], + format_types[1] ) ) + return 2; + + const pid_t diff_pid = fork(); + if( diff_pid == 0 ) // child (diff) + { + const char ** const argv = new const char *[diff_args.size()+5]; + argv[0] = DIFF; + for( unsigned i = 0; i < diff_args.size(); ++i ) + argv[i+1] = diff_args[i]; + argv[diff_args.size()+1] = "--"; + argv[diff_args.size()+2] = fifonames[0].c_str(); + argv[diff_args.size()+3] = fifonames[1].c_str(); + argv[diff_args.size()+4] = 0; + execvp( argv[0], (char **)argv ); + show_exec_error( DIFF ); + _exit( 2 ); + } + if( diff_pid < 0 ) // parent + { show_fork_error( DIFF ); return 2; } + + int retval = wait_for_child( diff_pid, DIFF ); + + for( int i = 0; i < 2; ++i ) + if( !good_status( children[i], retval == 0 ) ) retval = 2; + + for( int i = 0; i < 2; ++i ) + if( filenames[i] != "-" && close( infd[i] ) != 0 ) + { + show_file_error( filenames[i].c_str(), "Error closing input file", errno ); + retval = 2; + } + + return retval; + } diff --git a/zegrep.in b/zegrep.in new file mode 100644 index 0000000..0cac12e --- /dev/null +++ b/zegrep.in @@ -0,0 +1,3 @@ +#! /bin/sh +bindir=`echo "$0" | sed -e 's,[^/]*$,,'` +exec "${bindir}"zgrep -E "$@" diff --git a/zfgrep.in b/zfgrep.in new file mode 100644 index 0000000..c1a96d8 --- /dev/null +++ b/zfgrep.in @@ -0,0 +1,3 @@ +#! /bin/sh +bindir=`echo "$0" | sed -e 's,[^/]*$,,'` +exec "${bindir}"zgrep -F "$@" diff --git a/zgrep.cc b/zgrep.cc new file mode 100644 index 0000000..1454e77 --- /dev/null +++ b/zgrep.cc @@ -0,0 +1,401 @@ +/* Zgrep - search compressed files for a regular expression + Copyright (C) 2010-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(__MSVCRT__) || defined(__OS2__) +#include +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + + +namespace { + +#include "recursive.cc" +#include "zcatgrep.cc" + +void show_help() + { + std::printf( "zgrep is a front end to the program grep that allows transparent search\n" + "on any combination of compressed and uncompressed files. If any file\n" + "given is compressed, its decompressed content is used. If a file given\n" + "does not exist, and its name does not end with one of the known\n" + "extensions, zgrep tries the compressed file names corresponding to the\n" + "formats supported. If a file fails to decompress, zgrep continues\n" + "searching the rest of the files.\n" + "\nIf a file is specified as '-', data are read from standard input,\n" + "decompressed if needed, and fed to grep. Data read from standard input\n" + "must be of the same type; all uncompressed or all in the same\n" + "compressed format.\n" + "\nIf no files are specified, recursive searches examine the current\n" + "working directory, and nonrecursive searches read standard input.\n" + "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" + "\nUsage: zgrep [options] [files]\n" + "\nExit status is 0 if match, 1 if no match, 2 if trouble.\n" + "Some options only work if the grep program used supports them.\n" + "\nOptions:\n" + " --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -a, --text treat all files as text\n" + " -A, --after-context= print lines of trailing context\n" + " -b, --byte-offset print the byte offset of each line\n" + " -B, --before-context= print lines of leading context\n" + " -c, --count only print a count of matching lines per file\n" + " -C, --context= print lines of output context\n" + " --color[=] show matched strings in color\n" + " -e, --regexp= use as the pattern to match\n" + " -E, --extended-regexp is an extended regular expression\n" + " -f, --file= obtain patterns from \n" + " -F, --fixed-strings is a set of newline-separated strings\n" + " -h, --no-filename suppress the prefixing filename on output\n" + " -H, --with-filename print the filename for each match\n" + " -i, --ignore-case ignore case distinctions\n" + " -I ignore binary files\n" + " -l, --files-with-matches only print names of files containing matches\n" + " -L, --files-without-match only print names of files containing no matches\n" + " -m, --max-count= stop after matches\n" + " -M, --format= process only the formats in \n" + " -n, --line-number print the line number of each line\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -o, --only-matching show only the part of a line matching \n" + " -O, --force-format= force the format given (bz2, gz, lz, xz)\n" + " -q, --quiet suppress all messages\n" + " -r, --recursive operate recursively on directories\n" + " -R, --dereference-recursive recursively follow symbolic links\n" + " -s, --no-messages suppress error messages\n" + " -v, --invert-match select non-matching lines\n" + " --verbose verbose mode (show error messages)\n" + " -w, --word-regexp match only whole words\n" + " -x, --line-regexp match only whole lines\n" + " --bz2= set compressor and options for bzip2 format\n" + " --gz= set compressor and options for gzip format\n" + " --lz= set compressor and options for lzip format\n" + " --xz= set compressor and options for xz format\n" + "\nNumbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); + show_help_addr(); + } + + +int zgrep_stdin( int infd, const int format_index, + const std::vector< const char * > & grep_args ) + { + Children children; + if( !set_data_feeder( "", &infd, children, format_index ) ) return 2; + const pid_t grep_pid = fork(); + if( grep_pid == 0 ) // child (grep) + { + if( dup2( infd, STDIN_FILENO ) >= 0 && close( infd ) == 0 ) + { + const char ** const argv = new const char *[grep_args.size()+2]; + argv[0] = GREP; + for( unsigned i = 0; i < grep_args.size(); ++i ) + argv[i+1] = grep_args[i]; + argv[grep_args.size()+1] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( GREP ); + _exit( 2 ); + } + if( grep_pid < 0 ) // parent + { show_fork_error( GREP ); return 2; } + + int retval = wait_for_child( grep_pid, GREP ); + + if( !good_status( children, retval == 1 ) ) retval = 2; + + if( close( infd ) != 0 ) + { show_close_error(); return 2; } + return retval; + } + + +int zgrep_file( int infd, const int format_index, + const std::string & input_filename, + const std::vector< const char * > & grep_args, + const int list_mode, const bool show_name ) + { + Children children; + if( !set_data_feeder( input_filename, &infd, children, format_index ) ) + return 2; + int fda[2]; // pipe from grep + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return 2; } + const pid_t grep_pid = fork(); + if( grep_pid == 0 ) // child (grep) + { + if( dup2( infd, STDIN_FILENO ) >= 0 && + dup2( fda[1], STDOUT_FILENO ) >= 0 && + close( infd ) == 0 && close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const char ** const argv = new const char *[grep_args.size()+2]; + argv[0] = GREP; + for( unsigned i = 0; i < grep_args.size(); ++i ) + argv[i+1] = grep_args[i]; + argv[grep_args.size()+1] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( GREP ); + _exit( 2 ); + } + if( grep_pid < 0 ) // parent + { show_fork_error( GREP ); return 2; } + + close( fda[1] ); + enum { buffer_size = 256 }; + uint8_t buffer[buffer_size]; + bool line_begin = true; + while( true ) + { + const int size = readblock( fda[0], buffer, buffer_size ); + if( size != buffer_size && errno ) + { show_error( "Read error", errno ); return 2; } + if( size > 0 && !list_mode ) + { + if( show_name ) + for( int i = 0; i < size; ++i ) + { + if( line_begin ) + { line_begin = false; std::printf( "%s:", input_filename.c_str() ); } + if( buffer[i] == '\n' ) line_begin = true; + putchar( buffer[i] ); + } + else if( std::fwrite( buffer, 1, size, stdout ) != (unsigned)size ) + { std::fflush( stdout ); show_error( "Write error", errno ); return 2; } + std::fflush( stdout ); + } + if( size < buffer_size ) break; // end of grep's output + } + + int retval = wait_for_child( grep_pid, GREP ); + + if( !good_status( children, retval == 1 ) ) retval = 2; + + if( list_mode && (retval == 0) == (list_mode == 1) ) + { std::printf( "%s\n", input_filename.c_str() ); std::fflush( stdout ); } + if( close( infd ) != 0 ) + { show_close_error(); return 2; } + if( close( fda[0] ) != 0 ) + { show_close_error( GREP ); return 2; } + return retval; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { help_opt = 256, verbose_opt, color_opt, + bz2_opt, gz_opt, lz_opt, xz_opt }; + int format_index = -1; + int list_mode = 0; // 1 = list matches, -1 = list non-matches + int recursive = 0; // 1 = '-r', 2 = '-R' + int show_name = -1; // tri-state bool + bool no_messages = false; + std::list< std::string > filenames; + std::vector< const char * > grep_args; // args to grep, maybe empty + std::string color_option; // needed because of optional arg + program_name = "zgrep"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'a', "text", Arg_parser::no }, // grep GNU + { 'A', "after-context", Arg_parser::yes }, // grep GNU + { 'b', "byte-offset", Arg_parser::no }, // grep GNU + { 'B', "before-context", Arg_parser::yes }, // grep GNU + { 'c', "count", Arg_parser::no }, // grep + { 'C', "context", Arg_parser::yes }, // grep GNU + { 'e', "regexp", Arg_parser::yes }, // grep + { 'E', "extended-regexp", Arg_parser::no }, // grep + { 'f', "file ", Arg_parser::yes }, // grep + { 'F', "fixed-strings", Arg_parser::no }, // grep + { 'h', "no-filename", Arg_parser::no }, // grep GNU + { 'H', "with-filename", Arg_parser::no }, // grep GNU + { 'i', "ignore-case", Arg_parser::no }, // grep + { 'I', 0, Arg_parser::no }, // grep GNU + { 'l', "files-with-matches", Arg_parser::no }, // grep + { 'L', "files-without-match", Arg_parser::no }, // grep GNU + { 'm', "max-count", Arg_parser::yes }, // grep GNU + { 'M', "format", Arg_parser::yes }, + { 'n', "line-number", Arg_parser::no }, // grep + { 'N', "no-rcfile", Arg_parser::no }, + { 'o', "only-matching", Arg_parser::no }, // grep + { 'O', "force-format", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'R', "dereference-recursive", Arg_parser::no }, + { 's', "no-messages", Arg_parser::no }, // grep + { 'v', "invert-match", Arg_parser::no }, // grep + { 'V', "version", Arg_parser::no }, + { 'w', "word-regexp", Arg_parser::no }, // grep GNU + { 'x', "line-regexp", Arg_parser::no }, // grep + { help_opt, "help", Arg_parser::no }, + { verbose_opt, "verbose", Arg_parser::no }, + { color_opt, "color", Arg_parser::maybe }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { 0 , 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 2; } + + maybe_process_config_file( parser ); + + int argind = 0; + bool pattern_found = false; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case 'a': grep_args.push_back( "-a" ); break; + case 'A': grep_args.push_back( "-A" ); + grep_args.push_back( arg.c_str() ); break; + case 'b': grep_args.push_back( "-b" ); break; + case 'B': grep_args.push_back( "-B" ); + grep_args.push_back( arg.c_str() ); break; + case 'c': grep_args.push_back( "-c" ); break; + case 'C': grep_args.push_back( "-C" ); + grep_args.push_back( arg.c_str() ); break; + case 'e': grep_args.push_back( "-e" ); + grep_args.push_back( arg.c_str() ); pattern_found = true; break; + case 'E': grep_args.push_back( "-E" ); break; + case 'f': grep_args.push_back( "-f" ); + grep_args.push_back( arg.c_str() ); pattern_found = true; break; + case 'F': grep_args.push_back( "-F" ); break; + case 'h': show_name = false; break; + case 'H': show_name = true; break; + case 'i': grep_args.push_back( "-i" ); break; + case 'I': grep_args.push_back( "-I" ); break; + case 'l': grep_args.push_back( "-l" ); list_mode = 1; break; + case 'L': grep_args.push_back( "-L" ); list_mode = -1; break; + case 'm': grep_args.push_back( "-m" ); + grep_args.push_back( arg.c_str() ); break; + case 'M': parse_format_list( arg ); break; + case 'n': grep_args.push_back( "-n" ); break; + case 'N': break; + case 'o': grep_args.push_back( "-o" ); break; + case 'O': format_index = parse_format_type( arg ); break; + case 'q': grep_args.push_back( "-q" ); verbosity = -1; break; + case 'r': recursive = 1; break; + case 'R': recursive = 2; break; + case 's': grep_args.push_back( "-s" ); no_messages = true; break; + case 'v': grep_args.push_back( "-v" ); break; + case 'V': show_version(); return 0; + case 'w': grep_args.push_back( "-w" ); break; + case 'x': grep_args.push_back( "-x" ); break; + case help_opt : show_help(); return 0; + case verbose_opt: if( verbosity < 4 ) ++verbosity; + no_messages = false; break; + case color_opt: color_option = "--color"; + if( !arg.empty() ) { color_option += '='; color_option += arg; } + break; + case bz2_opt: parse_compressor( arg, fmt_bz2 ); break; + case gz_opt: parse_compressor( arg, fmt_gz ); break; + case lz_opt: parse_compressor( arg, fmt_lz ); break; + case xz_opt: parse_compressor( arg, fmt_xz ); break; + default : internal_error( "uncaught option." ); + } + } // end process options + + if( !color_option.empty() ) // push the last value set + grep_args.push_back( color_option.c_str() ); + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + if( !pattern_found ) + { + if( argind >= parser.arguments() ) + { show_error( "Pattern not found." ); return 2; } + const std::string & arg = parser.argument( argind++ ); + if( arg.size() && arg[0] == '-' ) grep_args.push_back( "-e" ); + grep_args.push_back( arg.c_str() ); + } + + for( ; argind < parser.arguments(); ++argind ) + filenames.push_back( parser.argument( argind ) ); + + if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" ); + + if( show_name < 0 ) show_name = ( filenames.size() != 1 || recursive ); + + std::string input_filename; + int retval = 1; + bool error = false; + bool stdin_used = false; + while( next_filename( filenames, input_filename, error, recursive, + false, no_messages ) ) + { + int infd; + if( input_filename == "." ) + { + if( stdin_used ) continue; else stdin_used = true; + infd = STDIN_FILENO; input_filename = "-"; + } + else + { + infd = open_instream( input_filename, format_index < 0, no_messages ); + if( infd < 0 ) { error = true; continue; } + } + + int tmp; + if( infd == STDIN_FILENO ) + tmp = zgrep_stdin( infd, format_index, grep_args ); + else tmp = zgrep_file( infd, format_index, input_filename, grep_args, + list_mode, show_name ); + if( tmp == 0 || ( tmp == 2 && retval == 1 ) ) retval = tmp; + + if( close( infd ) != 0 ) + { show_file_error( input_filename.c_str(), "Error closing input file", + errno ); error = true; } + if( retval == 0 && verbosity < 0 ) break; + } + + if( std::fclose( stdout ) != 0 ) + { + show_error( "Error closing stdout", errno ); + error = true; + } + if( error && ( retval != 0 || verbosity >= 0 ) ) retval = 2; + return retval; + } diff --git a/ztest.cc b/ztest.cc new file mode 100644 index 0000000..812278e --- /dev/null +++ b/ztest.cc @@ -0,0 +1,335 @@ +/* Ztest - verify the integrity of compressed files + Copyright (C) 2010-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(__MSVCRT__) || defined(__OS2__) +#include +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +namespace { + +#include "recursive.cc" + +void show_help() + { + std::printf( "ztest verifies the integrity of the compressed files specified.\n" + "Uncompressed files are ignored. If a file is specified as '-', the\n" + "integrity of compressed data read from standard input is verified. Data\n" + "read from standard input must be all in the same compressed format. If\n" + "a file fails to decompress, does not exist, can't be opened, or is a\n" + "terminal, ztest continues verifying the rest of the files. A final\n" + "diagnostic is shown at verbosity level 1 or higher if any file fails the\n" + "test when testing multiple files.\n" + "\nIf no files are specified, recursive searches examine the current\n" + "working directory, and nonrecursive searches read standard input.\n" + "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" + "\nNote that error detection in the xz format is broken. First, some xz\n" + "files lack integrity information. Second, not all xz decompressors can\n" + "verify the integrity of all xz files. Third, section 2.1.1.2 'Stream\n" + "Flags' of the xz format specification allows xz decompressors to produce\n" + "garbage output without issuing any warning. Therefore, xz files can't\n" + "always be verified as reliably as files in the other formats can.\n" + "\nUsage: ztest [options] [files]\n" + "\nExit status is 0 if all compressed files verify OK, 1 if environmental\n" + "problems (file not found, invalid flags, I/O errors, etc), 2 if any\n" + "compressed file is corrupt or invalid.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -M, --format= process only the formats in \n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -O, --force-format= force the format given (bz2, gz, lz, xz)\n" + " -q, --quiet suppress all messages\n" + " -r, --recursive operate recursively on directories\n" + " -R, --dereference-recursive recursively follow symbolic links\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " --bz2= set compressor and options for bzip2 format\n" + " --gz= set compressor and options for gzip format\n" + " --lz= set compressor and options for lzip format\n" + " --xz= set compressor and options for xz format\n" ); + show_help_addr(); + } + + +int open_instream( const std::string & input_filename ) + { + const int infd = open( input_filename.c_str(), O_RDONLY | O_BINARY ); + if( infd < 0 ) + show_file_error( input_filename.c_str(), "Can't open input file", errno ); + return infd; + } + + +int ztest_stdin( const int infd, int format_index, + const std::vector< const char * > & ztest_args ) + { + uint8_t magic_data[magic_buf_size]; + int magic_size = 0; + if( format_index < 0 ) + format_index = test_format( infd, magic_data, &magic_size ); + const char * const compressor_name = get_compressor_name( format_index ); + if( !compressor_name ) + { show_error( "Unknown data format read from stdin." ); return 2; } + int fda[2]; // pipe from feeder + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return 1; } + + const pid_t pid = fork(); + if( pid == 0 ) // child1 (compressor feeder) + { + if( close( fda[0] ) != 0 || + !feed_data( "", infd, fda[1], magic_data, magic_size ) ) + _exit( 1 ); + if( close( fda[1] ) != 0 ) + { show_close_error(); _exit( 1 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return 1; } + + const pid_t pid2 = fork(); + if( pid2 == 0 ) // child2 (compressor) + { + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const int size2 = ztest_args.size(); + const char ** const argv = new const char *[size+size2+3]; + argv[0] = compressor_name; + for( int i = 0; i < size; ++i ) + argv[i+1] = compressor_args[i].c_str(); + for( int i = 0; i < size2; ++i ) + argv[i+size+1] = ztest_args[i]; + argv[size+size2+1] = "-t"; + argv[size+size2+2] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( compressor_name ); + _exit( 1 ); + } + if( pid2 < 0 ) // parent + { show_fork_error( compressor_name ); return 1; } + + close( fda[0] ); close( fda[1] ); + const bool isgzxz = ( format_index == fmt_gz || format_index == fmt_xz ); + int retval = wait_for_child( pid2, compressor_name, 1, isgzxz ); + if( retval == 0 && wait_for_child( pid, "data feeder" ) != 0 ) + retval = 1; + return retval; + } + + +int ztest_file( const int infd, int format_index, + const std::string & input_filename, + const std::vector< const char * > & ztest_args ) + { + static int disable_xz = -1; // tri-state bool + uint8_t magic_data[magic_buf_size]; + int magic_size = 0; + if( format_index < 0 ) + format_index = test_format( infd, magic_data, &magic_size ); + const char * const compressor_name = get_compressor_name( format_index ); + if( !compressor_name ) + return 0; // ignore this file + if( format_index == fmt_xz ) + { + if( disable_xz < 0 ) + { + std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; + disable_xz = ( std::system( command.c_str() ) != 0 ); + } + if( disable_xz ) return 0; // ignore this file if no xz installed + } + + const pid_t pid = fork(); + + if( pid == 0 ) // child (compressor) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const int size2 = ztest_args.size(); + const char ** const argv = new const char *[size+size2+5]; + argv[0] = compressor_name; + for( int i = 0; i < size; ++i ) + argv[i+1] = compressor_args[i].c_str(); + for( int i = 0; i < size2; ++i ) + argv[i+size+1] = ztest_args[i]; + argv[size+size2+1] = "-t"; + argv[size+size2+2] = "--"; + argv[size+size2+3] = input_filename.c_str(); + argv[size+size2+4] = 0; + execvp( argv[0], (char **)argv ); + show_exec_error( compressor_name ); + _exit( 1 ); + } + if( pid < 0 ) // parent + { show_fork_error( compressor_name ); return 1; } + + const bool isgzxz = ( format_index == fmt_gz || format_index == fmt_xz ); + return wait_for_child( pid, compressor_name, 1, isgzxz ); + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt }; + int format_index = -1; + int recursive = 0; // 1 = '-r', 2 = '-R' + std::list< std::string > filenames; + std::vector< const char * > ztest_args; // args to ztest, maybe empty + program_name = "ztest"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'h', "help", Arg_parser::no }, + { 'M', "format", Arg_parser::yes }, + { 'N', "no-rcfile", Arg_parser::no }, + { 'O', "force-format", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'R', "dereference-recursive", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { 0 , 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 1; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case 'h': show_help(); return 0; + case 'M': parse_format_list( arg ); break; + case 'N': break; + case 'O': format_index = parse_format_type( arg ); break; + case 'q': verbosity = -1; ztest_args.push_back( "-q" ); break; + case 'r': recursive = 1; break; + case 'R': recursive = 2; break; + case 'v': if( verbosity < 4 ) ++verbosity; + ztest_args.push_back( "-v" ); break; + case 'V': show_version(); return 0; + case bz2_opt: parse_compressor( arg, fmt_bz2, 1 ); break; + case gz_opt: parse_compressor( arg, fmt_gz, 1 ); break; + case lz_opt: parse_compressor( arg, fmt_lz, 1 ); break; + case xz_opt: parse_compressor( arg, fmt_xz, 1 ); break; + default : internal_error( "uncaught option." ); + } + } // end process options + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + for( ; argind < parser.arguments(); ++argind ) + filenames.push_back( parser.argument( argind ) ); + + if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" ); + + std::string input_filename; + int files_tested = 0, failed_tests = 0; + int retval = 0; + bool error = false; + bool stdin_used = false; + while( next_filename( filenames, input_filename, error, recursive ) ) + { + int infd; + if( input_filename == "." ) + { + if( stdin_used ) continue; else stdin_used = true; + infd = STDIN_FILENO; input_filename = "-"; + } + else + { + infd = open_instream( input_filename ); + if( infd < 0 ) { error = true; continue; } + } + + if( isatty( infd ) ) // for example /dev/tty + { + show_file_error( input_filename == "-" ? "(stdin)" : input_filename.c_str(), + "I won't read compressed data from a terminal." ); + close( infd ); error = true; continue; + } + + int tmp; + if( infd == STDIN_FILENO ) + tmp = ztest_stdin( infd, format_index, ztest_args ); + else tmp = ztest_file( infd, format_index, input_filename, ztest_args ); + if( tmp > retval ) retval = tmp; + ++files_tested; if( tmp ) ++failed_tests; + + if( close( infd ) != 0 ) + { show_file_error( input_filename.c_str(), "Error closing input file", + errno ); error = true; } + } + + if( std::fclose( stdout ) != 0 ) // in case decompressor writes to stdout + { + show_error( "Error closing stdout", errno ); + error = true; + } + if( error && retval == 0 ) retval = 1; + if( failed_tests > 0 && verbosity >= 1 && files_tested > 1 ) + std::fprintf( stderr, "%s: warning: %d %s failed the test.\n", + program_name, failed_tests, + ( failed_tests == 1 ) ? "file" : "files" ); + return retval; + } diff --git a/zupdate.cc b/zupdate.cc new file mode 100644 index 0000000..a605f35 --- /dev/null +++ b/zupdate.cc @@ -0,0 +1,412 @@ +/* Zupdate - recompress bzip2, gzip, xz files to lzip format + Copyright (C) 2013-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(__MSVCRT__) || defined(__OS2__) +#include +#endif + +#include "arg_parser.h" +#include "rc.h" + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +namespace { + +#include "recursive.cc" + +void show_help() + { + std::printf( "zupdate recompresses files from bzip2, gzip, and xz formats to lzip\n" + "format. Each original is compared with the new file and then deleted.\n" + "Only regular files with standard file name extensions are recompressed,\n" + "other files are ignored. Compressed files are decompressed and then\n" + "recompressed on the fly; no temporary files are created. The lzip format\n" + "is chosen as destination because it is the most appropriate for\n" + "long-term data archiving.\n" + "\nIf no files are specified, recursive searches examine the current\n" + "working directory, and nonrecursive searches do nothing.\n" + "\nIf the lzip compressed version of a file already exists, the file is\n" + "skipped unless the option '--force' is given. In this case, if the\n" + "comparison with the existing lzip version fails, an error is returned\n" + "and the original file is not deleted. The operation of zupdate is meant\n" + "to be safe and not cause any data loss. Therefore, existing lzip\n" + "compressed files are never overwritten nor deleted.\n" + "\nThe names of the original files must have one of the following extensions:\n" + "'.bz2', '.gz', or '.xz', which are recompressed to '.lz';\n" + "'.tbz', '.tbz2', '.tgz', or '.txz', which are recompressed to '.tlz'.\n" + "\nUsage: zupdate [options] [files]\n" + "\nExit status is 0 if all the compressed files were successfully recompressed\n" + "(if needed), compared, and deleted (if requested). Non-zero otherwise.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -f, --force don't skip a file even if the .lz exists\n" + " -k, --keep keep (don't delete) input files\n" + " -l, --lzip-verbose pass one option -v to the lzip compressor\n" + " -M, --format= process only the formats in \n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -q, --quiet suppress all messages\n" + " -r, --recursive operate recursively on directories\n" + " -R, --dereference-recursive recursively follow symbolic links\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " -0 .. -9 set compression level [default 9]\n" + " --bz2= set compressor and options for bzip2 format\n" + " --gz= set compressor and options for gzip format\n" + " --lz= set compressor and options for lzip format\n" + " --xz= set compressor and options for xz format\n" ); + show_help_addr(); + } + + +int cant_execute( const std::string & command, const int status ) + { + if( verbosity >= 0 ) + { + if( WIFEXITED( status ) ) + std::fprintf( stderr, "%s: Error executing '%s'. Exit status = %d\n", + program_name, command.c_str(), WEXITSTATUS( status ) ); + else + std::fprintf( stderr, "%s: Can't execute '%s'\n", + program_name, command.c_str() ); + } + return 1; + } + + +// Set permissions, owner, and times. +void set_permissions( const char * const rname, const struct stat & in_stats ) + { + bool warning = false; + const mode_t mode = in_stats.st_mode; + // chown will in many cases return with EPERM, which can be safely ignored. + if( chown( rname, in_stats.st_uid, in_stats.st_gid ) == 0 ) + { if( chmod( rname, mode ) != 0 ) warning = true; } + else + if( errno != EPERM || + chmod( rname, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) + warning = true; + struct utimbuf t; + t.actime = in_stats.st_atime; + t.modtime = in_stats.st_mtime; + if( utime( rname, &t ) != 0 ) warning = true; + if( warning && verbosity >= 2 ) + show_error( "Can't change output file attributes." ); + } + + + // Returns 0 for success, -1 for file skipped, 1 for error. +int zupdate_file( const std::string & name, const char * const lzip_name, + const std::vector< std::string > & lzip_args2, + const bool force, const bool keep_input_files, + const bool no_rcfile ) + { + static int disable_xz = -1; // tri-state bool + int format_index = -1; + std::string rname; // recompressed name + + const int eindex = extension_index( name ); // search extension + if( eindex >= 0 ) + { + format_index = extension_format( eindex ); + if( format_index == fmt_lz ) + { + if( verbosity >= 2 ) + std::fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n", + program_name, name.c_str(), extension_from( eindex ) ); + return 0; // ignore this file + } + rname.assign( name, 0, name.size() - std::strlen( extension_from( eindex ) ) ); + rname += ( std::strcmp( extension_to( eindex ), ".tar" ) == 0 ) ? + ".tlz" : ".lz"; // keep combined extension + } + const char * const compressor_name = get_compressor_name( format_index ); + if( !compressor_name ) + { + if( verbosity >= 2 ) + std::fprintf( stderr, "%s: Unknown extension in file name '%s' -- ignored.\n", + program_name, name.c_str() ); + return 0; // ignore this file + } + + struct stat in_stats; + if( stat( name.c_str(), &in_stats ) != 0 ) // check input file + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't stat input file '%s': %s\n", + program_name, name.c_str(), std::strerror( errno ) ); + return 1; + } + if( !S_ISREG( in_stats.st_mode ) ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Input file '%s' is not a regular file.\n", + program_name, name.c_str() ); + return 1; + } + + struct stat st; // not used + const std::string rname2( rname + ".lz" ); // produced by lzip < 1.20 + const bool lz_exists = ( stat( rname.c_str(), &st ) == 0 ); + // don't modify an existing 'rname.lz' + const bool lz_lz_exists = ( stat( rname2.c_str(), &st ) == 0 ); + if( lz_exists && !force ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n", + program_name, rname.c_str() ); + return -1; + } + + if( format_index == fmt_xz ) + { + if( disable_xz < 0 ) + { + std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; + disable_xz = ( std::system( command.c_str() ) != 0 ); + } + if( disable_xz ) return 0; // ignore this file if no xz installed + } + + if( !lz_exists ) // recompress + { + if( verbosity >= 1 ) + std::fprintf( stderr, "Recompressing file '%s'\n", name.c_str() ); + int fda[2]; // pipe between decompressor and compressor + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return 1; } + + const pid_t pid = fork(); + if( pid == 0 ) // child1 (decompressor) + { + if( dup2( fda[1], STDOUT_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const char ** const argv = new const char *[size+5]; + argv[0] = compressor_name; + for( int i = 0; i < size; ++i ) argv[i+1] = compressor_args[i].c_str(); + argv[size+1] = "-cd"; + argv[size+2] = "--"; + argv[size+3] = name.c_str(); + argv[size+4] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( compressor_name ); + _exit( 1 ); + } + if( pid < 0 ) // parent + { show_fork_error( compressor_name ); return 1; } + + const pid_t pid2 = fork(); + if( pid2 == 0 ) // child2 (lzip compressor) + { + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const std::vector< std::string > & lzip_args = + get_compressor_args( fmt_lz ); + const int size = lzip_args.size(); + const int size2 = lzip_args2.size(); + const char ** const argv = new const char *[size+size2+5]; + argv[0] = lzip_name; + argv[1] = "-9"; + for( int i = 0; i < size; ++i ) argv[i+2] = lzip_args[i].c_str(); + for( int i = 0; i < size2; ++i ) argv[i+size+2] = lzip_args2[i].c_str(); + argv[size+size2+2] = "-o"; + argv[size+size2+3] = rname.c_str(); + argv[size+size2+4] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( lzip_name ); + _exit( 1 ); + } + if( pid2 < 0 ) // parent + { show_fork_error( lzip_name ); return 1; } + + close( fda[0] ); close( fda[1] ); + int retval = wait_for_child( pid, compressor_name ); + int retval2 = wait_for_child( pid2, lzip_name ); + if( retval || retval2 ) + { if( !lz_lz_exists ) std::remove( rname2.c_str() ); // lzip < 1.20 + std::remove( rname.c_str() ); return 1; } + if( stat( rname.c_str(), &st ) != 0 && + ( lz_lz_exists || stat( rname2.c_str(), &st ) != 0 || + std::rename( rname2.c_str(), rname.c_str() ) != 0 ) ) + { show_file_error( rname.c_str(), "Error renaming output file", errno ); + return 1; } // lzip < 1.11 + set_permissions( rname.c_str(), in_stats ); + } + + { + if( lz_exists && verbosity >= 1 ) + std::fprintf( stderr, "Comparing file '%s'\n", name.c_str() ); + std::string zcmp_command( invocation_name ); + unsigned i = zcmp_command.size(); + while( i > 0 && zcmp_command[i-1] != '/' ) --i; + zcmp_command.resize( i ); zcmp_command.insert( zcmp_command.begin(), '\'' ); + zcmp_command += "zcmp' "; // '[dir/]zcmp' + if( no_rcfile ) zcmp_command += "-N "; + if( verbosity < 0 ) zcmp_command += "-q "; + zcmp_command += '\''; zcmp_command += name; + zcmp_command += "' '"; zcmp_command += rname; zcmp_command += '\''; + int status = std::system( zcmp_command.c_str() ); + if( status != 0 ) + { if( !lz_exists ) std::remove( rname.c_str() ); + return cant_execute( zcmp_command, status ); } + } + + if( !keep_input_files && std::remove( name.c_str() ) != 0 && errno != ENOENT ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't delete input file '%s': %s\n", + program_name, name.c_str(), std::strerror( errno ) ); + return 1; + } + return 0; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt }; + int recursive = 0; // 1 = '-r', 2 = '-R' + std::list< std::string > filenames; + std::vector< std::string > lzip_args2; // args to lzip, maybe empty + bool force = false; + bool keep_input_files = false; + bool no_rcfile = false; + program_name = "zupdate"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { '0', 0, Arg_parser::no }, + { '1', 0, Arg_parser::no }, + { '2', 0, Arg_parser::no }, + { '3', 0, Arg_parser::no }, + { '4', 0, Arg_parser::no }, + { '5', 0, Arg_parser::no }, + { '6', 0, Arg_parser::no }, + { '7', 0, Arg_parser::no }, + { '8', 0, Arg_parser::no }, + { '9', 0, Arg_parser::no }, + { 'f', "force", Arg_parser::no }, + { 'h', "help", Arg_parser::no }, + { 'k', "keep", Arg_parser::no }, + { 'l', "lzip-verbose", Arg_parser::no }, + { 'M', "format", Arg_parser::yes }, + { 'N', "no-rcfile", Arg_parser::no }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'R', "dereference-recursive", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { 0 , 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 1; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + lzip_args2.push_back( "-" ); lzip_args2.back() += code; break; + case 'f': force = true; break; + case 'h': show_help(); return 0; + case 'k': keep_input_files = true; break; + case 'l': lzip_args2.push_back( "-v" ); break; + case 'M': parse_format_list( arg ); break; + case 'N': no_rcfile = true; break; + case 'q': verbosity = -1; lzip_args2.push_back( "-q" ); break; + case 'r': recursive = 1; break; + case 'R': recursive = 2; break; + case 'v': if( verbosity < 4 ) ++verbosity; break; + case 'V': show_version(); return 0; + case bz2_opt: parse_compressor( arg, fmt_bz2, 1 ); break; + case gz_opt: parse_compressor( arg, fmt_gz, 1 ); break; + case lz_opt: parse_compressor( arg, fmt_lz, 1 ); break; + case xz_opt: parse_compressor( arg, fmt_xz, 1 ); break; + default : internal_error( "uncaught option." ); + } + } // end process options + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + const char * const lzip_name = get_compressor_name( fmt_lz ); + if( !lzip_name ) + { show_error( "Missing name of compressor for lzip format." ); return 1; } + + for( ; argind < parser.arguments(); ++argind ) + filenames.push_back( parser.argument( argind ) ); + + if( filenames.empty() && recursive ) filenames.push_back( "." ); + + std::string input_filename; + int retval = 0; + bool error = false; + while( next_filename( filenames, input_filename, error, recursive, true ) ) + { + int tmp = zupdate_file( input_filename, lzip_name, lzip_args2, force, + keep_input_files, no_rcfile ); + if( tmp < 0 ) error = true; + if( tmp > retval ) retval = tmp; + if( tmp > 0 ) break; + } + if( error && retval == 0 ) retval = 1; + return retval; + } diff --git a/zutils.cc b/zutils.cc new file mode 100644 index 0000000..54090ff --- /dev/null +++ b/zutils.cc @@ -0,0 +1,283 @@ +/* Zutils - Utilities dealing with compressed files + Copyright (C) 2009-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rc.h" +#include "zutils.h" + + +namespace { + +inline bool isvalid_ds( const uint8_t ds ) // lzip valid dictionary_size + { + enum { min_dictionary_size = 1 << 12, + max_dictionary_size = 1 << 29 }; + unsigned dictionary_size = ( 1 << ( ds & 0x1F ) ); + if( dictionary_size > min_dictionary_size ) + dictionary_size -= ( dictionary_size / 16 ) * ( ( ds >> 5 ) & 7 ); + return ( dictionary_size >= min_dictionary_size && + dictionary_size <= max_dictionary_size ); + } + + +/* Returns -1 if child not terminated, 2 in case of error, or exit status of + child process 'pid'. +*/ +int child_status( const pid_t pid, const char * const name ) + { + int status; + while( true ) + { + const int tmp = waitpid( pid, &status, WNOHANG ); + if( tmp == -1 && errno != EINTR ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Error checking status of '%s': %s\n", + program_name, name, std::strerror( errno ) ); + _exit( 2 ); + } + if( tmp == 0 ) return -1; // child not terminated + if( tmp == pid ) break; // child terminated + } + if( WIFEXITED( status ) ) return WEXITSTATUS( status ); + if( WIFSIGNALED( status ) && WTERMSIG( status ) == SIGPIPE ) return 0; + return 2; + } + +} // end namespace + + +/* Returns the number of bytes really read. + If (returned value < size) and (errno == 0), means EOF was reached. +*/ +int readblock( const int fd, uint8_t * const buf, const int size ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = read( fd, buf + sz, size - sz ); + if( n > 0 ) sz += n; + else if( n == 0 ) break; // EOF + else if( errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +/* Returns the number of bytes really written. + If (returned value < size), it is always an error. +*/ +int writeblock( const int fd, const uint8_t * const buf, const int size ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = write( fd, buf + sz, size - sz ); + if( n > 0 ) sz += n; + else if( n < 0 && errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +// Empty filename means stdin. +// +bool feed_data( const std::string & filename, const int infd, const int outfd, + const uint8_t * magic_data, const int magic_size ) + { + if( magic_size && writeblock( outfd, magic_data, magic_size ) != magic_size ) + { show_error( "Write error", errno ); return false; } + enum { buffer_size = 4096 }; + uint8_t buffer[buffer_size]; + while( true ) + { + const int size = readblock( infd, buffer, buffer_size ); + if( size != buffer_size && errno ) + { const char * const name = filename.empty() ? "-" : filename.c_str(); + show_file_error( name, "Read error", errno ); return false; } + if( size > 0 && writeblock( outfd, buffer, size ) != size ) + { show_error( "Write error", errno ); return false; } + if( size < buffer_size ) break; + } + return true; + } + + +bool good_status( const Children & children, const bool finished ) + { + bool error = false; + for( int i = 0; i < 2; ++i ) + { + const pid_t pid = children.pid[i]; + if( pid ) + { + const char * const name = + ( i == 0 ) ? "data feeder" : children.compressor_name; + // even if compressor finished, trailing data may remain in data feeder + if( i == 0 || !finished ) + { + const int tmp = child_status( pid, name ); + if( tmp < 0 ) // child not terminated + { kill( pid, SIGTERM ); wait_for_child( pid, name ); } + else if( tmp != 0 ) error = true; // child status != 0 + } + else + if( wait_for_child( pid, name ) != 0 ) error = true; + } + } + return !error; + } + + +bool set_data_feeder( const std::string & filename, int * const infdp, + Children & children, int format_index ) + { + uint8_t magic_data[magic_buf_size]; + int magic_size = 0; + if( format_index < 0 ) + format_index = test_format( *infdp, magic_data, &magic_size ); + children.compressor_name = get_compressor_name( format_index ); + + if( children.compressor_name ) // compressed + { + int fda[2]; // pipe from feeder + int fda2[2]; // pipe from compressor + if( pipe( fda ) < 0 || pipe( fda2 ) < 0 ) + { show_error( "Can't create pipe", errno ); return false; } + const int old_infd = *infdp; + *infdp = fda2[0]; + const pid_t pid = fork(); + if( pid == 0 ) // child 1 (compressor feeder) + { + if( close( fda[0] ) != 0 || + close( fda2[0] ) != 0 || close( fda2[1] ) != 0 || + !feed_data( filename, old_infd, fda[1], magic_data, magic_size ) ) + _exit( 2 ); + if( close( fda[1] ) != 0 ) + { show_close_error(); _exit( 2 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return false; } + + const pid_t pid2 = fork(); + if( pid2 == 0 ) // child 2 (compressor) + { + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + dup2( fda2[1], STDOUT_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 && + close( fda2[0] ) == 0 && close( fda2[1] ) == 0 ) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const char ** const argv = new const char *[size+3]; + argv[0] = children.compressor_name; + for( int i = 0; i < size; ++i ) + argv[i+1] = compressor_args[i].c_str(); + argv[size+1] = ( verbosity >= 0 ) ? "-d" : "-dq"; + argv[size+2] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( children.compressor_name ); + _exit( 2 ); + } + if( pid2 < 0 ) // parent + { show_fork_error( children.compressor_name ); return false; } + + close( fda[0] ); close( fda[1] ); close( fda2[1] ); + children.pid[0] = pid; + children.pid[1] = pid2; + } + else // uncompressed + { + int fda[2]; // pipe from feeder + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return false; } + const int old_infd = *infdp; + *infdp = fda[0]; + const pid_t pid = fork(); + if( pid == 0 ) // child (feeder) + { + if( close( fda[0] ) != 0 || + !feed_data( filename, old_infd, fda[1], magic_data, magic_size ) ) + _exit( 2 ); + if( close( fda[1] ) != 0 ) + { show_close_error(); _exit( 2 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return false; } + close( fda[1] ); + children.pid[0] = pid; + children.pid[1] = 0; + } + return true; + } + + +// Returns format index or -1 if uncompressed +// +int test_format( const int infd, uint8_t magic_data[], + int * const magic_sizep ) + { + enum { bzip2_magic_size = 3, + gzip_magic_size = 2, + lzip_magic_size = 5, + xz_magic_size = 5 }; + const uint8_t bzip2_magic[bzip2_magic_size] = + { 0x42, 0x5A, 0x68 }; // "BZh" + const uint8_t gzip_magic[gzip_magic_size] = + { 0x1F, 0x8B }; + const uint8_t lzip_magic[lzip_magic_size] = + { 0x4C, 0x5A, 0x49, 0x50, 0x01 }; // "LZIP\001" + const uint8_t xz_magic[xz_magic_size] = + { 0xFD, 0x37, 0x7A, 0x58, 0x5A }; // 0xFD, "7zXZ" + + *magic_sizep = readblock( infd, magic_data, magic_buf_size ); + if( *magic_sizep == magic_buf_size ) + { + if( std::memcmp( magic_data, bzip2_magic, bzip2_magic_size ) == 0 && + magic_data[3] >= '1' && magic_data[3] <= '9' && + std::memcmp( magic_data + 4, "1AY&SY", 6 ) == 0 ) + return fmt_bz2; + if( std::memcmp( magic_data, gzip_magic, gzip_magic_size ) == 0 ) + return fmt_gz; + if( std::memcmp( magic_data, lzip_magic, lzip_magic_size ) == 0 && + isvalid_ds( magic_data[lzip_magic_size] ) ) + return fmt_lz; + if( std::memcmp( magic_data, xz_magic, xz_magic_size ) == 0 ) + return fmt_xz; + } + return -1; + } diff --git a/zutils.h b/zutils.h new file mode 100644 index 0000000..064af51 --- /dev/null +++ b/zutils.h @@ -0,0 +1,37 @@ +/* Zutils - Utilities dealing with compressed files + Copyright (C) 2009-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +int readblock( const int fd, uint8_t * const buf, const int size ); +int writeblock( const int fd, const uint8_t * const buf, const int size ); +bool feed_data( const std::string & filename, const int infd, const int outfd, + const uint8_t * magic_data, const int magic_size ); + +struct Children + { + const char * compressor_name; + pid_t pid[2]; // data feeder, compressor + }; +bool good_status( const Children & children, const bool finished ); +bool set_data_feeder( const std::string & filename, int * const infdp, + Children & children, int format_index ); + +enum { magic_buf_size = 10 }; // >= longest extended magic (bzip2) + +// Returns format index or -1 if uncompressed +// +int test_format( const int infd, uint8_t magic_data[], + int * const magic_sizep ); diff --git a/zutilsrc b/zutilsrc new file mode 100644 index 0000000..04a1d69 --- /dev/null +++ b/zutilsrc @@ -0,0 +1,16 @@ +# +# Runtime Configuration file for Zutils +# +# Zutils looks for this file in: +# 1 - $HOME/.zutilsrc +# 2 - ${sysconfdir}/zutilsrc + +# This file sets the compressor and options to be used for each format. +# The command line options override compressors specified in this file. +# Syntax: = [options] +# Uncomment each line you want to take effect. + +# bz2 = lbzip2 -n2 +# gz = pigz -p2 +# lz = plzip -n2 +# xz = pixz -p2 -- cgit v1.2.3