diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 13:35:06 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 13:35:06 +0000 |
commit | f9be52fa859528b0439964589d03d85796275cdb (patch) | |
tree | 174763c6a2c37083bf3e81c8a9aca0b2eb40c9cc | |
parent | Initial commit. (diff) | |
download | zutils-f9be52fa859528b0439964589d03d85796275cdb.tar.xz zutils-f9be52fa859528b0439964589d03d85796275cdb.zip |
Adding upstream version 1.10.upstream/1.10upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
-rw-r--r-- | AUTHORS | 1 | ||||
-rw-r--r-- | COPYING | 338 | ||||
-rw-r--r-- | ChangeLog | 187 | ||||
-rw-r--r-- | INSTALL | 81 | ||||
-rw-r--r-- | Makefile.in | 238 | ||||
-rw-r--r-- | NEWS | 9 | ||||
-rw-r--r-- | README | 47 | ||||
-rw-r--r-- | arg_parser.cc | 196 | ||||
-rw-r--r-- | arg_parser.h | 99 | ||||
-rwxr-xr-x | configure | 207 | ||||
-rw-r--r-- | doc/zcat.1 | 103 | ||||
-rw-r--r-- | doc/zcmp.1 | 90 | ||||
-rw-r--r-- | doc/zdiff.1 | 121 | ||||
-rw-r--r-- | doc/zgrep.1 | 153 | ||||
-rw-r--r-- | doc/ztest.1 | 82 | ||||
-rw-r--r-- | doc/zupdate.1 | 91 | ||||
-rw-r--r-- | doc/zutils.info | 838 | ||||
-rw-r--r-- | doc/zutils.texi | 882 | ||||
-rw-r--r-- | rc.cc | 411 | ||||
-rw-r--r-- | rc.h | 62 | ||||
-rw-r--r-- | recursive.cc | 109 | ||||
-rwxr-xr-x | testsuite/check.sh | 560 | ||||
-rw-r--r-- | testsuite/test.txt | 676 | ||||
-rw-r--r-- | testsuite/test.txt.tar | bin | 0 -> 40960 bytes | |||
-rw-r--r-- | testsuite/test_bad_crc.lz | bin | 0 -> 7376 bytes | |||
-rw-r--r-- | testsuite/zcat_vs.dat | 68 | ||||
-rw-r--r-- | testsuite/zero_bad_crc.gz | bin | 0 -> 20 bytes | |||
-rw-r--r-- | testsuite/zero_bad_crc.lz | bin | 0 -> 36 bytes | |||
-rw-r--r-- | zcat.cc | 386 | ||||
-rw-r--r-- | zcatgrep.cc | 59 | ||||
-rw-r--r-- | zcmp.cc | 471 | ||||
-rw-r--r-- | zcmpdiff.cc | 70 | ||||
-rw-r--r-- | zdiff.cc | 440 | ||||
-rw-r--r-- | zegrep.in | 3 | ||||
-rw-r--r-- | zfgrep.in | 3 | ||||
-rw-r--r-- | zgrep.cc | 401 | ||||
-rw-r--r-- | ztest.cc | 335 | ||||
-rw-r--r-- | zupdate.cc | 412 | ||||
-rw-r--r-- | zutils.cc | 283 | ||||
-rw-r--r-- | zutils.h | 37 | ||||
-rw-r--r-- | zutilsrc | 16 |
41 files changed, 8565 insertions, 0 deletions
@@ -0,0 +1 @@ +The zutils were written by Antonio Diaz Diaz. @@ -0,0 +1,338 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) <year> <name of author> + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..cdbc3da --- /dev/null +++ b/ChangeLog @@ -0,0 +1,187 @@ +2021-01-05 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.10 released. + * zdiff.cc (set_fifonames): Encode pid in little endian order. + * zupdate.cc (zupdate_file): Fix a portability issue with Solaris 10. + * zutils.texi: Document that 'zgrep -L' fails with GNU grep 3.2 to 3.4. + * check.sh: Test empty input files with all tools except zupdate. + +2020-06-27 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.9 released. + * zcmp.cc, zdiff.cc: Read standard input only if requested. + * zdiff.cc (main): Pass options '-W' and '-y' to diff. + * zutils.cc (test_format): Detect bzip2 and lzip files better. + * ztest.cc (main): Continue testing if any input file is a terminal. + If verbosity >= 1, print number of files that failed the test. + * zcat.cc, zgrep.cc, ztest.cc (main): Check return value of close(infd). + * zutils.cc (good_status): Ignore trailing data remaining in feeder. + * zupdate.cc (zupdate_file): Support new and old lzip option '-o'. + Keep combined extensions: tgz, tbz, tbz2, txz --> tlz. + Quote file names in zcmp_command to allow file names with spaces. + * *.cc (main): Set a valid invocation_name even if argc == 0. + * zutils.texi: Improve descriptions of zcat, zcmp, and zdiff. + +2019-01-01 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.8 released. + * zcat.cc: Fix a buffer overflow on outbuf when '-v' is used. + * zcat.cc (cat): A canary byte has been added to outbuf. + * New option '-R, --dereference-recursive'. + * Option '-r, --recursive' now skips symlinks. + * If no files and recursive, examine current working directory. + * recursive.cc (test_full_name): Detect directory loops. + * recursive.cc: Ignore directories if not --recursive. + * recursive.cc: Remove extra trailing slashes from directory args. + * zcatgrep.cc (open_instream): Show correct errno. + * zutils.cc (good_status): Wait for killed child. + * Test and document continuation or exit of zcat, zgrep, ztest, + and zupdate in case of error. + * configure: Accept appending to CXXFLAGS, 'CXXFLAGS+=OPTIONS'. + +2018-02-13 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.7 released. + * zgrep.cc (main): Pass option '--color' to grep. + * check.sh: Add new tests for zgrep. + +2017-04-05 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.6 released. + * zcmp.cc: Accept 'B' suffix in '--ignore-initial=1kB:1234B'. + * zutils.cc (feed_data): Show input file name in error messages. + +2016-05-15 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.5 released. + * zupdate.cc (zupdate_file): Pass '-q' to zcmp if verbosity < 0. + * zcat.cc, zgrep.cc, ztest.cc (main): Don't use stdin more than once. + * zdiff.cc (set_fifonames): Use '_' if both names are different. + * configure: Avoid warning on some shells when testing for g++. + * Makefile.in: Detect the existence of install-info. + * check.sh: A POSIX shell is required to run the tests. + +2015-05-29 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.4 released. + * Option '--format' has been renamed to '-O, --force-format'. + * Add new option '-M, --format=<list>' to all utilities. + * zgrep.cc (main): Pass '-e' to grep if pattern begins with '-'. + * Makefile.in: New targets 'install*-compress'. + +2014-08-30 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.3 released. + * check.sh: Fix two values of expected exit status. + * zutils.texi: Document that '--format' does not verify format. + * Add two missing #includes. + * Change license to GPL version 2 or later. + +2014-02-01 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.2 released. + * New utility; zupdate. + * Remove zutils executable. Utils are now independent executables. + * zgrep.cc: Fix the exit status returned on error. + * zutils.texinfo: Rename to zutils.texi. + +2013-08-02 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.1 released. + * Add options '--bz2', '--gz', '--lz', and '--xz' to all utilities. + * Add runtime configuration file 'zutilsrc'. + * New function 'good_status' checks exit status of all children. + * Fix all uses of decompressed/uncompressed in the documentation. + +2013-05-31 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.0 released. + * Add new option '--format' to all utilities. + * main.cc (main): Make 'grep_show_name' tri-state so that file + name is no prefixed to output by default when searching one + file and '--recursive' has not been selected. + * zgrep.cc: Fix output of option '-L' (it behaved like '-l'). + * zcmp.cc: Fix deadlock when option '-n' is used. + * zdiff.cc (set_data_feeder): Call compressor with option '-q' + only if verbosity < 0. + * zutils.cc (set_data_feeder): Likewise. + * Change quote characters in messages as advised by GNU Standards. + * configure: Options now accept a separate argument. + Rename 'datadir' to 'datarootdir'. Ignore environment variables. + * Makefile.in: New target 'install-bin'. + * Use 'setmode' instead of '_setmode' on Windows and OS/2. + * zcat.cc (Line_number): Fix a portability issue with Solaris 9. + * INSTALL: Document installing zutils along with GNU gzip. + +2011-01-11 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.9 released. + * configure: New options 'DIFF' and 'GREP'. + * zcmp.cc: Fix deadlock when files differ. + * zgrep.cc: Fix deadlock when binary file matches. + +2010-11-15 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.8 released. + * main.cc: New options '--zcat', '--zgrep', and '--ztest'. + * zcat.cc: New file implementing zcat+cat functionality in C++. + * zcmp.cc: New file implementing zcmp+cmp functionality in C++. + * doc/zcmp.1: New file. + * Remove files zcmp.in, zdiff.in. + * zdiff.cc: New file implementing zdiff functionality in C++. + * zgrep.cc: New file implementing zgrep functionality in C++. + * All mentions to zegrep and zfgrep have been removed from the + documentation because egrep and fgrep are deprecated. + * ztest.cc: New file implementing ztest functionality in C++. + * Makefile.in: Add quotes to directory names. + * check.sh: Use 'test.txt' instead of 'COPYING' for testing. + * Remove environment safeguards from configure as requested by + Richard Stallman. Now environment variables affect configure. + +2009-10-21 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.7 released. + * New utility; ztest. + * zcat.in: New option '-r, --recursive'. + +2009-10-05 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.6 released. + * zcat.in, zgrep.in: Remove again default compressor. Format of + data read from stdin is now automatically detected. + * Makefile.in: Add option '--name' to help2man invocation. + +2009-10-01 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.5 released. + * zcat.in, zgrep.in: Read again data from stdin. + * Add again default compressor for stdin only. + +2009-09-17 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.4 released. + * Add two new utilities; zegrep and zfgrep. + * Add zutils executable which recognizes file formats. + +2009-08-28 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.3 released. + * Remove default compressor. + * zcat.in, zgrep.in: Don't read data from stdin. + * Update home page and mailing list addresses. + +2009-08-13 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.2 released. + * Add support for xz. + +2009-08-07 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.1 released. + + +Copyright (C) 2009-2021 Antonio Diaz Diaz. + +This file is a collection of facts, and thus it is not copyrightable, +but just in case, you have unlimited permission to copy, distribute, and +modify it. @@ -0,0 +1,81 @@ +Requirements +------------ +You will need a C++11 compiler. (gcc 3.3.6 or newer is recommended). +I use gcc 6.1.0 and 4.1.2, but the code should compile with any standards +compliant compiler. +Gcc is available at http://gcc.gnu.org. + +POSIX compliant versions of diff and grep are required for zdiff and zgrep. + +(Option -L of zgrep fails (prints wrong results, returns wrong status, and +even hangs) when using GNU grep versions 3.2 to 3.4 inclusive because of a +wrong change in the exit status of grep, which was reverted in GNU grep 3.5). + +Compressors for bzip2, gzip and lzip formats are required to run the tests. + +If you are installing zutils along with GNU gzip and want to keep the +gzip scripts, the recommended method is to configure gzip as follows: + + ./configure --program-transform-name='s/^z/gz/' + +This renames, at installation time, the gzip scripts and man pages to +'gzcat', 'gzcat.1', etc, avoiding the name clashing with the programs +and man pages from zutils. + + +Procedure +--------- +1. Unpack the archive if you have not done so already: + + tar -xf zutils[version].tar.lz +or + lzip -cd zutils[version].tar.lz | tar -xf - + +This creates the directory ./zutils[version] containing the source from +the main archive. + +2. Change to zutils directory and run configure. + (Try 'configure --help' for usage instructions). + + cd zutils[version] + ./configure + +3. Run make. + + make + +4. Optionally, type 'make check' to run the tests that come with zutils. + +5. Type 'make install' to install the programs and any data files and + documentation. + + Or type 'make install-compress', which additionally compresses the + info manual and the man pages after installation. + (Installing compressed docs may become the default in the future). + + You can install only the programs, the info manual, or the man pages by + typing 'make install-bin', 'make install-info', or 'make install-man' + respectively. + + +Another way +----------- +You can also compile zutils into a separate directory. +To do this, you must use a version of 'make' that supports the variable +'VPATH', such as GNU 'make'. 'cd' to the directory where you want the +object files and executables to go and run the 'configure' script. +'configure' automatically checks for the source code in '.', in '..', and +in the directory that 'configure' is in. + +'configure' recognizes the option '--srcdir=DIR' to control where to +look for the sources. Usually 'configure' can determine that directory +automatically. + +After running 'configure', you can run 'make' and 'make install' as +explained above. + + +Copyright (C) 2009-2021 Antonio Diaz Diaz. + +This file is free documentation: you have unlimited permission to copy, +distribute, and modify it. diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 0000000..55a974e --- /dev/null +++ b/Makefile.in @@ -0,0 +1,238 @@ + +DISTNAME = $(pkgname)-$(pkgversion) +INSTALL = install +INSTALL_PROGRAM = $(INSTALL) -m 755 +INSTALL_SCRIPT = $(INSTALL) -m 755 +INSTALL_DATA = $(INSTALL) -m 644 +INSTALL_DIR = $(INSTALL) -d -m 755 +SHELL = /bin/sh +CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 + +objs = arg_parser.o rc.o zutils.o \ + zcat.o zcmp.o zdiff.o zgrep.o ztest.o zupdate.o +zcat_objs = arg_parser.o rc.o zutils.o zcat.o +zcmp_objs = arg_parser.o rc.o zutils.o zcmp.o +zdiff_objs = arg_parser.o rc.o zutils.o zdiff.o +zgrep_objs = arg_parser.o rc.o zutils.o zgrep.o +ztest_objs = arg_parser.o rc.o zutils.o ztest.o +zupdate_objs = arg_parser.o rc.o zupdate.o +programs = zcat zcmp zdiff zgrep ztest zupdate +scripts = zegrep zfgrep + + +.PHONY : all install install-bin install-info install-man \ + install-strip install-compress install-strip-compress \ + install-bin-strip install-info-compress install-man-compress \ + uninstall uninstall-bin uninstall-info uninstall-man \ + doc info man check dist clean distclean + +all : $(programs) $(scripts) + +zcat : $(zcat_objs) + $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(zcat_objs) + +zcmp : $(zcmp_objs) + $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(zcmp_objs) + +zdiff : $(zdiff_objs) + $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(zdiff_objs) + +zegrep : zegrep.in + cat $(VPATH)/zegrep.in > $@ + chmod a+x zegrep + +zfgrep : zfgrep.in + cat $(VPATH)/zfgrep.in > $@ + chmod a+x zfgrep + +zgrep : $(zgrep_objs) + $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(zgrep_objs) + +ztest : $(ztest_objs) + $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(ztest_objs) + +zupdate : $(zupdate_objs) + $(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ $(zupdate_objs) + +rc.o : rc.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -DSYSCONFDIR=\"$(sysconfdir)\" -c -o $@ $< + +zdiff.o : zdiff.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DDIFF=\"$(DIFF)\" -c -o $@ $< + +zgrep.o : zgrep.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DGREP=\"$(GREP)\" -c -o $@ $< + +%.o : %.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< + +$(objs) : Makefile +$(scripts) : Makefile +arg_parser.o : arg_parser.h +rc.o : arg_parser.h rc.h +zcat.o : arg_parser.h rc.h zutils.h recursive.cc zcatgrep.cc +zcmp.o : arg_parser.h rc.h zutils.h zcmpdiff.cc +zdiff.o : arg_parser.h rc.h zutils.h zcmpdiff.cc +zgrep.o : arg_parser.h rc.h zutils.h recursive.cc zcatgrep.cc +ztest.o : arg_parser.h rc.h zutils.h recursive.cc +zupdate.o : arg_parser.h rc.h recursive.cc +zutils.o : rc.h zutils.h + + +doc : info man + +info : $(VPATH)/doc/$(pkgname).info + +$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi + cd $(VPATH)/doc && makeinfo $(pkgname).texi + +man : $(VPATH)/doc/zcat.1 $(VPATH)/doc/zcmp.1 $(VPATH)/doc/zdiff.1 \ + $(VPATH)/doc/zgrep.1 $(VPATH)/doc/ztest.1 $(VPATH)/doc/zupdate.1 + +$(VPATH)/doc/zcat.1 : zcat + help2man -n 'decompress and concatenate files to standard output' \ + -o $@ --no-info ./zcat + +$(VPATH)/doc/zcmp.1 : zcmp + help2man -n 'decompress and compare two files byte by byte' \ + -o $@ --no-info ./zcmp + +$(VPATH)/doc/zdiff.1 : zdiff + help2man -n 'decompress and compare two files line by line' \ + -o $@ --no-info ./zdiff + +$(VPATH)/doc/zgrep.1 : zgrep + help2man -n 'search compressed files for a regular expression' \ + -o $@ --no-info ./zgrep + +$(VPATH)/doc/ztest.1 : ztest + help2man -n 'verify the integrity of compressed files' \ + -o $@ --no-info ./ztest + +$(VPATH)/doc/zupdate.1 : zupdate + help2man -n 'recompress bzip2, gzip, xz files to lzip format' \ + -o $@ --no-info ./zupdate + +Makefile : $(VPATH)/configure $(VPATH)/Makefile.in + ./config.status + +check : all + @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion) + +install : install-bin install-info install-man +install-strip : install-bin-strip install-info install-man +install-compress : install-bin install-info-compress install-man-compress +install-strip-compress : install-bin-strip install-info-compress install-man-compress + +install-bin : all + if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi + $(INSTALL_PROGRAM) ./zcat "$(DESTDIR)$(bindir)/zcat" + $(INSTALL_PROGRAM) ./zcmp "$(DESTDIR)$(bindir)/zcmp" + $(INSTALL_PROGRAM) ./zdiff "$(DESTDIR)$(bindir)/zdiff" + $(INSTALL_SCRIPT) ./zegrep "$(DESTDIR)$(bindir)/zegrep" + $(INSTALL_SCRIPT) ./zfgrep "$(DESTDIR)$(bindir)/zfgrep" + $(INSTALL_PROGRAM) ./zgrep "$(DESTDIR)$(bindir)/zgrep" + $(INSTALL_PROGRAM) ./ztest "$(DESTDIR)$(bindir)/ztest" + $(INSTALL_PROGRAM) ./zupdate "$(DESTDIR)$(bindir)/zupdate" + if [ ! -e "$(DESTDIR)$(sysconfdir)/$(pkgname)rc" ] ; then \ + if [ ! -d "$(DESTDIR)$(sysconfdir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(sysconfdir)" ; fi ; \ + $(INSTALL_DATA) $(VPATH)/$(pkgname)rc "$(DESTDIR)$(sysconfdir)/$(pkgname)rc" ; \ + fi + +install-bin-strip : all + $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install-bin + +install-info : + if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi + -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* + $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info" + -if $(CAN_RUN_INSTALLINFO) ; then \ + install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + fi + +install-info-compress : install-info + lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info" + +install-man : + if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi + -rm -f "$(DESTDIR)$(mandir)/man1/zcat.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zcmp.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zdiff.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zgrep.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/ztest.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zupdate.1"* + $(INSTALL_DATA) $(VPATH)/doc/zcat.1 "$(DESTDIR)$(mandir)/man1/zcat.1" + $(INSTALL_DATA) $(VPATH)/doc/zcmp.1 "$(DESTDIR)$(mandir)/man1/zcmp.1" + $(INSTALL_DATA) $(VPATH)/doc/zdiff.1 "$(DESTDIR)$(mandir)/man1/zdiff.1" + $(INSTALL_DATA) $(VPATH)/doc/zgrep.1 "$(DESTDIR)$(mandir)/man1/zgrep.1" + $(INSTALL_DATA) $(VPATH)/doc/ztest.1 "$(DESTDIR)$(mandir)/man1/ztest.1" + $(INSTALL_DATA) $(VPATH)/doc/zupdate.1 "$(DESTDIR)$(mandir)/man1/zupdate.1" + +install-man-compress : install-man + lzip -v -9 "$(DESTDIR)$(mandir)/man1/zcat.1" + lzip -v -9 "$(DESTDIR)$(mandir)/man1/zcmp.1" + lzip -v -9 "$(DESTDIR)$(mandir)/man1/zdiff.1" + lzip -v -9 "$(DESTDIR)$(mandir)/man1/zgrep.1" + lzip -v -9 "$(DESTDIR)$(mandir)/man1/ztest.1" + lzip -v -9 "$(DESTDIR)$(mandir)/man1/zupdate.1" + +uninstall : uninstall-man uninstall-info uninstall-bin + +uninstall-bin : + -rm -f "$(DESTDIR)$(bindir)/zcat" + -rm -f "$(DESTDIR)$(bindir)/zcmp" + -rm -f "$(DESTDIR)$(bindir)/zdiff" + -rm -f "$(DESTDIR)$(bindir)/zegrep" + -rm -f "$(DESTDIR)$(bindir)/zfgrep" + -rm -f "$(DESTDIR)$(bindir)/zgrep" + -rm -f "$(DESTDIR)$(bindir)/ztest" + -rm -f "$(DESTDIR)$(bindir)/zupdate" + -rm -f "$(DESTDIR)$(sysconfdir)/$(pkgname)rc" + +uninstall-info : + -if $(CAN_RUN_INSTALLINFO) ; then \ + install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + fi + -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* + +uninstall-man : + -rm -f "$(DESTDIR)$(mandir)/man1/zcat.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zcmp.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zdiff.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zgrep.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/ztest.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zupdate.1"* + +dist : doc + ln -sf $(VPATH) $(DISTNAME) + tar -Hustar --owner=root --group=root -cvf $(DISTNAME).tar \ + $(DISTNAME)/AUTHORS \ + $(DISTNAME)/COPYING \ + $(DISTNAME)/ChangeLog \ + $(DISTNAME)/INSTALL \ + $(DISTNAME)/Makefile.in \ + $(DISTNAME)/NEWS \ + $(DISTNAME)/README \ + $(DISTNAME)/configure \ + $(DISTNAME)/doc/*.1 \ + $(DISTNAME)/doc/$(pkgname).info \ + $(DISTNAME)/doc/$(pkgname).texi \ + $(DISTNAME)/$(pkgname)rc \ + $(DISTNAME)/*.h \ + $(DISTNAME)/*.cc \ + $(DISTNAME)/z*.in \ + $(DISTNAME)/testsuite/check.sh \ + $(DISTNAME)/testsuite/test.txt \ + $(DISTNAME)/testsuite/test.txt.tar \ + $(DISTNAME)/testsuite/zcat_vs.dat \ + $(DISTNAME)/testsuite/test_bad_crc.lz \ + $(DISTNAME)/testsuite/zero_bad_crc.lz \ + $(DISTNAME)/testsuite/zero_bad_crc.gz + rm -f $(DISTNAME) + lzip -v -9 $(DISTNAME).tar + +clean : + -rm -f $(programs) $(scripts) $(objs) + +distclean : clean + -rm -f Makefile config.status *.tar *.tar.lz @@ -0,0 +1,9 @@ +Changes in version 1.10: + +A portability issue with Solaris 10 has been fixed. + +It has been documented in the manual that 'zgrep -L' fails with GNU grep +versions 3.2 to 3.4 inclusive because of a wrong change reverted in GNU grep +3.5. + +'make check' now tests empty input files with all tools except zupdate. @@ -0,0 +1,47 @@ +Description + +Zutils is a collection of utilities able to process any combination of +compressed and uncompressed files transparently. If any file given, +including standard input, is compressed, its decompressed content is used. +Compressed files are decompressed on the fly; no temporary files are +created. + +These utilities are not wrapper scripts but safer and more efficient C++ +programs. In particular the option '--recursive' is very efficient in +those utilities supporting it. + +The utilities provided are zcat, zcmp, zdiff, zgrep, ztest, and zupdate. +The formats supported are bzip2, gzip, lzip, and xz. +Zutils uses external compressors. The compressor to be used for each format +is configurable at runtime. + +zcat, zcmp, zdiff, and zgrep are improved replacements for the shell scripts +provided by GNU gzip. ztest is unique to zutils. zupdate is similar to +gzip's znew. + +NOTE: Bzip2 and lzip provide well-defined values of exit status, which makes +them safe to use with zutils. Gzip and xz may return ambiguous warning +values, making them less reliable back ends for zutils. + +FORMAT NOTE 1: The option '--format' allows the processing of a subset +of formats in recursive mode and when trying compressed file names: +'zgrep foo -r --format=bz2,lz somedir somefile.tar'. + +FORMAT NOTE 2: If the option '--force-format' is given, the files are +passed to the corresponding decompressor without verifying their format, +allowing for example the processing of compress'd (.Z) files with gzip: +'zcmp --force-format=gz file.Z file.lz'. + +LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never have +been compressed. Decompressed is used to refer to data which have undergone +the process of decompression. + + +Copyright (C) 2009-2021 Antonio Diaz Diaz. + +This file is free documentation: you have unlimited permission to copy, +distribute, and modify it. + +The file Makefile.in is a data file used by configure to produce the +Makefile. It has the same copyright owner and permissions that configure +itself. diff --git a/arg_parser.cc b/arg_parser.cc new file mode 100644 index 0000000..2e40a13 --- /dev/null +++ b/arg_parser.cc @@ -0,0 +1,196 @@ +/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) + Copyright (C) 2006-2021 Antonio Diaz Diaz. + + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#include <cstring> +#include <string> +#include <vector> + +#include "arg_parser.h" + + +bool Arg_parser::parse_long_option( const char * const opt, const char * const arg, + const Option options[], int & argind ) + { + unsigned len; + int index = -1; + bool exact = false, ambig = false; + + for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ; + + // Test all long options for either exact match or abbreviated matches. + for( int i = 0; options[i].code != 0; ++i ) + if( options[i].name && std::strncmp( options[i].name, &opt[2], len ) == 0 ) + { + if( std::strlen( options[i].name ) == len ) // Exact match found + { index = i; exact = true; break; } + else if( index < 0 ) index = i; // First nonexact match found + else if( options[index].code != options[i].code || + options[index].has_arg != options[i].has_arg ) + ambig = true; // Second or later nonexact match found + } + + if( ambig && !exact ) + { + error_ = "option '"; error_ += opt; error_ += "' is ambiguous"; + return false; + } + + if( index < 0 ) // nothing found + { + error_ = "unrecognized option '"; error_ += opt; error_ += '\''; + return false; + } + + ++argind; + data.push_back( Record( options[index].code ) ); + + if( opt[len+2] ) // '--<long_option>=<argument>' syntax + { + if( options[index].has_arg == no ) + { + error_ = "option '--"; error_ += options[index].name; + error_ += "' doesn't allow an argument"; + return false; + } + if( options[index].has_arg == yes && !opt[len+3] ) + { + error_ = "option '--"; error_ += options[index].name; + error_ += "' requires an argument"; + return false; + } + data.back().argument = &opt[len+3]; + return true; + } + + if( options[index].has_arg == yes ) + { + if( !arg || !arg[0] ) + { + error_ = "option '--"; error_ += options[index].name; + error_ += "' requires an argument"; + return false; + } + ++argind; data.back().argument = arg; + return true; + } + + return true; + } + + +bool Arg_parser::parse_short_option( const char * const opt, const char * const arg, + const Option options[], int & argind ) + { + int cind = 1; // character index in opt + + while( cind > 0 ) + { + int index = -1; + const unsigned char c = opt[cind]; + + if( c != 0 ) + for( int i = 0; options[i].code; ++i ) + if( c == options[i].code ) + { index = i; break; } + + if( index < 0 ) + { + error_ = "invalid option -- '"; error_ += c; error_ += '\''; + return false; + } + + data.push_back( Record( c ) ); + if( opt[++cind] == 0 ) { ++argind; cind = 0; } // opt finished + + if( options[index].has_arg != no && cind > 0 && opt[cind] ) + { + data.back().argument = &opt[cind]; ++argind; cind = 0; + } + else if( options[index].has_arg == yes ) + { + if( !arg || !arg[0] ) + { + error_ = "option requires an argument -- '"; error_ += c; + error_ += '\''; + return false; + } + data.back().argument = arg; ++argind; cind = 0; + } + } + return true; + } + + +Arg_parser::Arg_parser( const int argc, const char * const argv[], + const Option options[], const bool in_order ) + { + if( argc < 2 || !argv || !options ) return; + + std::vector< const char * > non_options; // skipped non-options + int argind = 1; // index in argv + + while( argind < argc ) + { + const unsigned char ch1 = argv[argind][0]; + const unsigned char ch2 = ch1 ? argv[argind][1] : 0; + + if( ch1 == '-' && ch2 ) // we found an option + { + const char * const opt = argv[argind]; + const char * const arg = ( argind + 1 < argc ) ? argv[argind+1] : 0; + if( ch2 == '-' ) + { + if( !argv[argind][2] ) { ++argind; break; } // we found "--" + else if( !parse_long_option( opt, arg, options, argind ) ) break; + } + else if( !parse_short_option( opt, arg, options, argind ) ) break; + } + else + { + if( in_order ) data.push_back( Record( argv[argind++] ) ); + else non_options.push_back( argv[argind++] ); + } + } + if( !error_.empty() ) data.clear(); + else + { + for( unsigned i = 0; i < non_options.size(); ++i ) + data.push_back( Record( non_options[i] ) ); + while( argind < argc ) + data.push_back( Record( argv[argind++] ) ); + } + } + + +Arg_parser::Arg_parser( const char * const opt, const char * const arg, + const Option options[] ) + { + if( !opt || !opt[0] || !options ) return; + + if( opt[0] == '-' && opt[1] ) // we found an option + { + int argind = 1; // dummy + if( opt[1] == '-' ) + { if( opt[2] ) parse_long_option( opt, arg, options, argind ); } + else + parse_short_option( opt, arg, options, argind ); + if( !error_.empty() ) data.clear(); + } + else data.push_back( Record( opt ) ); + } diff --git a/arg_parser.h b/arg_parser.h new file mode 100644 index 0000000..5629b90 --- /dev/null +++ b/arg_parser.h @@ -0,0 +1,99 @@ +/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) + Copyright (C) 2006-2021 Antonio Diaz Diaz. + + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +/* Arg_parser reads the arguments in 'argv' and creates a number of + option codes, option arguments, and non-option arguments. + + In case of error, 'error' returns a non-empty error message. + + 'options' is an array of 'struct Option' terminated by an element + containing a code which is zero. A null name means a short-only + option. A code value outside the unsigned char range means a + long-only option. + + Arg_parser normally makes it appear as if all the option arguments + were specified before all the non-option arguments for the purposes + of parsing, even if the user of your program intermixed option and + non-option arguments. If you want the arguments in the exact order + the user typed them, call 'Arg_parser' with 'in_order' = true. + + The argument '--' terminates all options; any following arguments are + treated as non-option arguments, even if they begin with a hyphen. + + The syntax for optional option arguments is '-<short_option><argument>' + (without whitespace), or '--<long_option>=<argument>'. +*/ + +class Arg_parser + { +public: + enum Has_arg { no, yes, maybe }; + + struct Option + { + int code; // Short option letter or code ( code != 0 ) + const char * name; // Long option name (maybe null) + Has_arg has_arg; + }; + +private: + struct Record + { + int code; + std::string argument; + explicit Record( const int c ) : code( c ) {} + explicit Record( const char * const arg ) : code( 0 ), argument( arg ) {} + }; + + const std::string empty_arg; + std::string error_; + std::vector< Record > data; + + bool parse_long_option( const char * const opt, const char * const arg, + const Option options[], int & argind ); + bool parse_short_option( const char * const opt, const char * const arg, + const Option options[], int & argind ); + +public: + Arg_parser( const int argc, const char * const argv[], + const Option options[], const bool in_order = false ); + + // Restricted constructor. Parses a single token and argument (if any). + Arg_parser( const char * const opt, const char * const arg, + const Option options[] ); + + const std::string & error() const { return error_; } + + // The number of arguments parsed. May be different from argc. + int arguments() const { return data.size(); } + + /* If code( i ) is 0, argument( i ) is a non-option. + Else argument( i ) is the option's argument (or empty). */ + int code( const int i ) const + { + if( i >= 0 && i < arguments() ) return data[i].code; + else return 0; + } + + const std::string & argument( const int i ) const + { + if( i >= 0 && i < arguments() ) return data[i].argument; + else return empty_arg; + } + }; diff --git a/configure b/configure new file mode 100755 index 0000000..48c7f81 --- /dev/null +++ b/configure @@ -0,0 +1,207 @@ +#! /bin/sh +# configure script for Zutils - Utilities dealing with compressed files +# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# +# This configure script is free software: you have unlimited permission +# to copy, distribute, and modify it. + +pkgname=zutils +pkgversion=1.10 +srctrigger=doc/${pkgname}.texi + +# clear some things potentially inherited from environment. +LC_ALL=C +export LC_ALL +srcdir= +prefix=/usr/local +exec_prefix='$(prefix)' +bindir='$(exec_prefix)/bin' +datarootdir='$(prefix)/share' +infodir='$(datarootdir)/info' +mandir='$(datarootdir)/man' +sysconfdir='$(prefix)/etc' +CXX=g++ +CPPFLAGS= +CXXFLAGS='-Wall -W -O2' +LDFLAGS= +DIFF=diff +GREP=grep + +# checking whether we are using GNU C++. +/bin/sh -c "${CXX} --version" > /dev/null 2>&1 || { CXX=c++ ; CXXFLAGS=-O2 ; } + +# Loop over all args +args= +no_create= +while [ $# != 0 ] ; do + + # Get the first arg, and shuffle + option=$1 ; arg2=no + shift + + # Add the argument quoted to args + if [ -z "${args}" ] ; then args="\"${option}\"" + else args="${args} \"${option}\"" ; fi + + # Split out the argument for options that take them + case ${option} in + *=*) optarg=`echo "${option}" | sed -e 's,^[^=]*=,,;s,/$,,'` ;; + esac + + # Process the options + case ${option} in + --help | -h) + echo "Usage: $0 [OPTION]... [VAR=VALUE]..." + echo + echo "To assign makefile variables (e.g., CXX, CXXFLAGS...), specify them as" + echo "arguments to configure in the form VAR=VALUE." + echo + echo "Options and variables: [defaults in brackets]" + echo " -h, --help display this help and exit" + echo " -V, --version output version information and exit" + echo " --srcdir=DIR find the sources in DIR [. or ..]" + echo " --prefix=DIR install into DIR [${prefix}]" + echo " --exec-prefix=DIR base directory for arch-dependent files [${exec_prefix}]" + echo " --bindir=DIR user executables directory [${bindir}]" + echo " --datarootdir=DIR base directory for doc and data [${datarootdir}]" + echo " --infodir=DIR info files directory [${infodir}]" + echo " --mandir=DIR man pages directory [${mandir}]" + echo " --sysconfdir=DIR read-only single-machine data directory [${sysconfdir}]" + echo " CXX=COMPILER C++ compiler to use [${CXX}]" + echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]" + echo " CXXFLAGS=OPTIONS command line options for the C++ compiler [${CXXFLAGS}]" + echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS" + echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]" + echo " DIFF=NAME diff program to use with zdiff [${DIFF}]" + echo " GREP=NAME grep program to use with zgrep [${GREP}]" + echo + exit 0 ;; + --version | -V) + echo "Configure script for ${pkgname} version ${pkgversion}" + exit 0 ;; + --srcdir) srcdir=$1 ; arg2=yes ;; + --prefix) prefix=$1 ; arg2=yes ;; + --exec-prefix) exec_prefix=$1 ; arg2=yes ;; + --bindir) bindir=$1 ; arg2=yes ;; + --datarootdir) datarootdir=$1 ; arg2=yes ;; + --infodir) infodir=$1 ; arg2=yes ;; + --mandir) mandir=$1 ; arg2=yes ;; + --sysconfdir) sysconfdir=$1 ; arg2=yes ;; + + --srcdir=*) srcdir=${optarg} ;; + --prefix=*) prefix=${optarg} ;; + --exec-prefix=*) exec_prefix=${optarg} ;; + --bindir=*) bindir=${optarg} ;; + --datarootdir=*) datarootdir=${optarg} ;; + --infodir=*) infodir=${optarg} ;; + --mandir=*) mandir=${optarg} ;; + --sysconfdir=*) sysconfdir=${optarg} ;; + --no-create) no_create=yes ;; + + CXX=*) CXX=${optarg} ;; + CPPFLAGS=*) CPPFLAGS=${optarg} ;; + CXXFLAGS=*) CXXFLAGS=${optarg} ;; + CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;; + LDFLAGS=*) LDFLAGS=${optarg} ;; + DIFF=*) DIFF=${optarg} ;; + GREP=*) GREP=${optarg} ;; + + --*) + echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; + *=* | *-*-*) ;; + *) + echo "configure: unrecognized option: '${option}'" 1>&2 + echo "Try 'configure --help' for more information." 1>&2 + exit 1 ;; + esac + + # Check if the option took a separate argument + if [ "${arg2}" = yes ] ; then + if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift + else echo "configure: Missing argument to '${option}'" 1>&2 + exit 1 + fi + fi +done + +# Find the source files, if location was not specified. +srcdirtext= +if [ -z "${srcdir}" ] ; then + srcdirtext="or . or .." ; srcdir=. + if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi + if [ ! -r "${srcdir}/${srctrigger}" ] ; then + ## the sed command below emulates the dirname command + srcdir=`echo "$0" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` + fi +fi + +if [ ! -r "${srcdir}/${srctrigger}" ] ; then + echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2 + echo "configure: (At least ${srctrigger} is missing)." 1>&2 + exit 1 +fi + +# Set srcdir to . if that's what it is. +if [ "`pwd`" = "`cd "${srcdir}" ; pwd`" ] ; then srcdir=. ; fi + +echo +if [ -z "${no_create}" ] ; then + echo "creating config.status" + rm -f config.status + cat > config.status << EOF +#! /bin/sh +# This file was generated automatically by configure. Don't edit. +# Run this file to recreate the current configuration. +# +# This script is free software: you have unlimited permission +# to copy, distribute, and modify it. + +exec /bin/sh $0 ${args} --no-create +EOF + chmod +x config.status +fi + +echo "creating Makefile" +echo "VPATH = ${srcdir}" +echo "prefix = ${prefix}" +echo "exec_prefix = ${exec_prefix}" +echo "bindir = ${bindir}" +echo "datarootdir = ${datarootdir}" +echo "infodir = ${infodir}" +echo "mandir = ${mandir}" +echo "sysconfdir = ${sysconfdir}" +echo "CXX = ${CXX}" +echo "CPPFLAGS = ${CPPFLAGS}" +echo "CXXFLAGS = ${CXXFLAGS}" +echo "LDFLAGS = ${LDFLAGS}" +echo "DIFF = ${DIFF}" +echo "GREP = ${GREP}" +rm -f Makefile +cat > Makefile << EOF +# Makefile for Zutils - Utilities dealing with compressed files +# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# This file was generated automatically by configure. Don't edit. +# +# This Makefile is free software: you have unlimited permission +# to copy, distribute, and modify it. + +pkgname = ${pkgname} +pkgversion = ${pkgversion} +VPATH = ${srcdir} +prefix = ${prefix} +exec_prefix = ${exec_prefix} +bindir = ${bindir} +datarootdir = ${datarootdir} +infodir = ${infodir} +mandir = ${mandir} +sysconfdir = ${sysconfdir} +CXX = ${CXX} +CPPFLAGS = ${CPPFLAGS} +CXXFLAGS = ${CXXFLAGS} +LDFLAGS = ${LDFLAGS} +DIFF = ${DIFF} +GREP = ${GREP} +EOF +cat "${srcdir}/Makefile.in" >> Makefile + +echo "OK. Now you can run make." diff --git a/doc/zcat.1 b/doc/zcat.1 new file mode 100644 index 0000000..fbaf821 --- /dev/null +++ b/doc/zcat.1 @@ -0,0 +1,103 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. +.TH ZCAT "1" "January 2021" "zutils 1.10" "User Commands" +.SH NAME +zcat \- decompress and concatenate files to standard output +.SH SYNOPSIS +.B zcat +[\fI\,options\/\fR] [\fI\,files\/\fR] +.SH DESCRIPTION +zcat copies each file argument to standard output in sequence. If any +file given is compressed, its decompressed content is copied. If a file +given does not exist, and its name does not end with one of the known +extensions, zcat tries the compressed file names corresponding to the +formats supported. If a file fails to decompress, zcat continues copying the +rest of the files. +.PP +If a file is specified as '\-', data are read from standard input, +decompressed if needed, and sent to standard output. Data read from +standard input must be of the same type; all uncompressed or all in the +same compressed format. +.PP +If no files are specified, recursive searches examine the current +working directory, and nonrecursive searches read standard input. +.PP +The formats supported are bzip2, gzip, lzip, and xz. +.PP +Exit status is 0 if no errors occurred, 1 otherwise. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-A\fR, \fB\-\-show\-all\fR +equivalent to '\-vET' +.TP +\fB\-b\fR, \fB\-\-number\-nonblank\fR +number nonblank output lines +.TP +\fB\-e\fR +equivalent to '\-vE' +.TP +\fB\-E\fR, \fB\-\-show\-ends\fR +display '$' at end of each line +.TP +\fB\-M\fR, \fB\-\-format=\fR<list> +process only the formats in <list> +.TP +\fB\-n\fR, \fB\-\-number\fR +number all output lines +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-O\fR, \fB\-\-force\-format=\fR<fmt> +force the format given (bz2, gz, lz, xz) +.TP +\fB\-q\fR, \fB\-\-quiet\fR +suppress all messages +.TP +\fB\-r\fR, \fB\-\-recursive\fR +operate recursively on directories +.TP +\fB\-R\fR, \fB\-\-dereference\-recursive\fR +recursively follow symbolic links +.TP +\fB\-s\fR, \fB\-\-squeeze\-blank\fR +never more than one single blank line +.TP +\fB\-t\fR +equivalent to '\-vT' +.TP +\fB\-T\fR, \fB\-\-show\-tabs\fR +display TAB characters as '^I' +.TP +\fB\-v\fR, \fB\-\-show\-nonprinting\fR +use '^' and 'M\-' notation, except for LF and TAB +.TP +\fB\-\-verbose\fR +verbose mode (show error messages) +.TP +\fB\-\-bz2=\fR<command> +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR<command> +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR<command> +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR<command> +set compressor and options for xz format +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2021 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. diff --git a/doc/zcmp.1 b/doc/zcmp.1 new file mode 100644 index 0000000..5ae5231 --- /dev/null +++ b/doc/zcmp.1 @@ -0,0 +1,90 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. +.TH ZCMP "1" "January 2021" "zutils 1.10" "User Commands" +.SH NAME +zcmp \- decompress and compare two files byte by byte +.SH SYNOPSIS +.B zcmp +[\fI\,options\/\fR] \fI\,file1 \/\fR[\fI\,file2\/\fR] +.SH DESCRIPTION +zcmp compares two files and, if they differ, writes to standard output the +first byte and line number where they differ. Bytes and lines are numbered +starting with 1. A hyphen '\-' used as a file argument means standard input. +If any file given is compressed, its decompressed content is used. Compressed +files are decompressed on the fly; no temporary files are created. +.PP +The formats supported are bzip2, gzip, lzip, and xz. +.PP +zcmp compares file1 to file2. The standard input is used only if file1 or +file2 refers to standard input. If file2 is omitted zcmp tries the +following: +.IP +\- If file1 is compressed, compares its decompressed contents with +the corresponding uncompressed file (the name of file1 with the +extension removed). +.IP +\- If file1 is uncompressed, compares it with the decompressed +contents of file1.[lz|bz2|gz|xz] (the first one that is found). +.PP +Exit status is 0 if inputs are identical, 1 if different, 2 if trouble. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-b\fR, \fB\-\-print\-bytes\fR +print differing bytes +.TP +\fB\-i\fR, \fB\-\-ignore\-initial=\fR<n>[:<n2>] +ignore differences in the first <n> bytes +.TP +\fB\-l\fR, \fB\-\-list\fR +list position, value of all differing bytes +.TP +\fB\-M\fR, \fB\-\-format=\fR<list> +process only the formats in <list> +.TP +\fB\-n\fR, \fB\-\-bytes=\fR<n> +compare at most <n> bytes +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-O\fR, \fB\-\-force\-format\fR=\fI\,[\/\fR<f1>][,<f2>] +force the formats given (bz2, gz, lz, xz) +.TP +\fB\-q\fR, \fB\-\-quiet\fR +suppress all messages +.TP +\fB\-s\fR, \fB\-\-silent\fR +(same as \fB\-\-quiet\fR) +.TP +\fB\-v\fR, \fB\-\-verbose\fR +verbose mode (same as \fB\-\-list\fR) +.TP +\fB\-\-bz2=\fR<command> +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR<command> +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR<command> +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR<command> +set compressor and options for xz format +.PP +Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, +Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2021 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. diff --git a/doc/zdiff.1 b/doc/zdiff.1 new file mode 100644 index 0000000..65a34b7 --- /dev/null +++ b/doc/zdiff.1 @@ -0,0 +1,121 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. +.TH ZDIFF "1" "January 2021" "zutils 1.10" "User Commands" +.SH NAME +zdiff \- decompress and compare two files line by line +.SH SYNOPSIS +.B zdiff +[\fI\,options\/\fR] \fI\,file1 \/\fR[\fI\,file2\/\fR] +.SH DESCRIPTION +zdiff compares two files and, if they differ, writes to standard output the +differences line by line. A hyphen '\-' used as a file argument means standard +input. If any file given is compressed, its decompressed content is used. +zdiff is a front end to the program diff and has the limitation that messages +from diff refer to temporary file names instead of those specified. +.PP +The formats supported are bzip2, gzip, lzip, and xz. +.PP +zdiff compares file1 to file2. The standard input is used only if file1 or +file2 refers to standard input. If file2 is omitted zdiff tries the +following: +.IP +\- If file1 is compressed, compares its decompressed contents with +the corresponding uncompressed file (the name of file1 with the +extension removed). +.IP +\- If file1 is uncompressed, compares it with the decompressed +contents of file1.[lz|bz2|gz|xz] (the first one that is found). +.PP +Exit status is 0 if inputs are identical, 1 if different, 2 if trouble. +Some options only work if the diff program used supports them. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-a\fR, \fB\-\-text\fR +treat all files as text +.TP +\fB\-b\fR, \fB\-\-ignore\-space\-change\fR +ignore changes in the amount of white space +.TP +\fB\-B\fR, \fB\-\-ignore\-blank\-lines\fR +ignore changes whose lines are all blank +.TP +\fB\-c\fR +use the context output format +.TP +\fB\-C\fR, \fB\-\-context=\fR<n> +same as \fB\-c\fR but use <n> lines of context +.TP +\fB\-d\fR, \fB\-\-minimal\fR +try hard to find a smaller set of changes +.TP +\fB\-E\fR, \fB\-\-ignore\-tab\-expansion\fR +ignore changes due to tab expansion +.TP +\fB\-i\fR, \fB\-\-ignore\-case\fR +ignore case differences in file contents +.TP +\fB\-M\fR, \fB\-\-format=\fR<list> +process only the formats in <list> +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-O\fR, \fB\-\-force\-format\fR=\fI\,[\/\fR<f1>][,<f2>] +force the formats given (bz2, gz, lz, xz) +.TP +\fB\-p\fR, \fB\-\-show\-c\-function\fR +show which C function each change is in +.TP +\fB\-q\fR, \fB\-\-brief\fR +output only whether files differ +.TP +\fB\-s\fR, \fB\-\-report\-identical\-files\fR +report when two files are identical +.TP +\fB\-t\fR, \fB\-\-expand\-tabs\fR +expand tabs to spaces in output +.TP +\fB\-T\fR, \fB\-\-initial\-tab\fR +make tabs line up by prepending a tab +.TP +\fB\-u\fR +use the unified output format +.TP +\fB\-U\fR, \fB\-\-unified=\fR<n> +same as \fB\-u\fR but use <n> lines of context +.TP +\fB\-w\fR, \fB\-\-ignore\-all\-space\fR +ignore all white space +.TP +\fB\-W\fR, \fB\-\-width=\fR<n> +output at most <n> print columns +.TP +\fB\-y\fR, \fB\-\-side\-by\-side\fR +output in two columns +.TP +\fB\-\-bz2=\fR<command> +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR<command> +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR<command> +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR<command> +set compressor and options for xz format +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2021 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. diff --git a/doc/zgrep.1 b/doc/zgrep.1 new file mode 100644 index 0000000..69ed0cd --- /dev/null +++ b/doc/zgrep.1 @@ -0,0 +1,153 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. +.TH ZGREP "1" "January 2021" "zutils 1.10" "User Commands" +.SH NAME +zgrep \- search compressed files for a regular expression +.SH SYNOPSIS +.B zgrep +[\fI\,options\/\fR] \fI\,<pattern> \/\fR[\fI\,files\/\fR] +.SH DESCRIPTION +zgrep is a front end to the program grep that allows transparent search +on any combination of compressed and uncompressed files. If any file +given is compressed, its decompressed content is used. If a file given +does not exist, and its name does not end with one of the known +extensions, zgrep tries the compressed file names corresponding to the +formats supported. If a file fails to decompress, zgrep continues +searching the rest of the files. +.PP +If a file is specified as '\-', data are read from standard input, +decompressed if needed, and fed to grep. Data read from standard input +must be of the same type; all uncompressed or all in the same +compressed format. +.PP +If no files are specified, recursive searches examine the current +working directory, and nonrecursive searches read standard input. +.PP +The formats supported are bzip2, gzip, lzip, and xz. +.PP +Exit status is 0 if match, 1 if no match, 2 if trouble. +Some options only work if the grep program used supports them. +.SH OPTIONS +.TP +\fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-a\fR, \fB\-\-text\fR +treat all files as text +.TP +\fB\-A\fR, \fB\-\-after\-context=\fR<n> +print <n> lines of trailing context +.TP +\fB\-b\fR, \fB\-\-byte\-offset\fR +print the byte offset of each line +.TP +\fB\-B\fR, \fB\-\-before\-context=\fR<n> +print <n> lines of leading context +.TP +\fB\-c\fR, \fB\-\-count\fR +only print a count of matching lines per file +.TP +\fB\-C\fR, \fB\-\-context=\fR<n> +print <n> lines of output context +.TP +\fB\-\-color[=\fR<when>] +show matched strings in color +.TP +\fB\-e\fR, \fB\-\-regexp=\fR<pattern> +use <pattern> as the pattern to match +.TP +\fB\-E\fR, \fB\-\-extended\-regexp\fR +<pattern> is an extended regular expression +.TP +\fB\-f\fR, \fB\-\-file=\fR<file> +obtain patterns from <file> +.TP +\fB\-F\fR, \fB\-\-fixed\-strings\fR +<pattern> is a set of newline\-separated strings +.TP +\fB\-h\fR, \fB\-\-no\-filename\fR +suppress the prefixing filename on output +.TP +\fB\-H\fR, \fB\-\-with\-filename\fR +print the filename for each match +.TP +\fB\-i\fR, \fB\-\-ignore\-case\fR +ignore case distinctions +.TP +\fB\-I\fR +ignore binary files +.TP +\fB\-l\fR, \fB\-\-files\-with\-matches\fR +only print names of files containing matches +.TP +\fB\-L\fR, \fB\-\-files\-without\-match\fR +only print names of files containing no matches +.TP +\fB\-m\fR, \fB\-\-max\-count=\fR<n> +stop after <n> matches +.TP +\fB\-M\fR, \fB\-\-format=\fR<list> +process only the formats in <list> +.TP +\fB\-n\fR, \fB\-\-line\-number\fR +print the line number of each line +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-o\fR, \fB\-\-only\-matching\fR +show only the part of a line matching <pattern> +.TP +\fB\-O\fR, \fB\-\-force\-format=\fR<fmt> +force the format given (bz2, gz, lz, xz) +.TP +\fB\-q\fR, \fB\-\-quiet\fR +suppress all messages +.TP +\fB\-r\fR, \fB\-\-recursive\fR +operate recursively on directories +.TP +\fB\-R\fR, \fB\-\-dereference\-recursive\fR +recursively follow symbolic links +.TP +\fB\-s\fR, \fB\-\-no\-messages\fR +suppress error messages +.TP +\fB\-v\fR, \fB\-\-invert\-match\fR +select non\-matching lines +.TP +\fB\-\-verbose\fR +verbose mode (show error messages) +.TP +\fB\-w\fR, \fB\-\-word\-regexp\fR +match only whole words +.TP +\fB\-x\fR, \fB\-\-line\-regexp\fR +match only whole lines +.TP +\fB\-\-bz2=\fR<command> +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR<command> +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR<command> +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR<command> +set compressor and options for xz format +.PP +Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, +Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2021 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. diff --git a/doc/ztest.1 b/doc/ztest.1 new file mode 100644 index 0000000..45cda27 --- /dev/null +++ b/doc/ztest.1 @@ -0,0 +1,82 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. +.TH ZTEST "1" "January 2021" "zutils 1.10" "User Commands" +.SH NAME +ztest \- verify the integrity of compressed files +.SH SYNOPSIS +.B ztest +[\fI\,options\/\fR] [\fI\,files\/\fR] +.SH DESCRIPTION +ztest verifies the integrity of the compressed files specified. +Uncompressed files are ignored. If a file is specified as '\-', the +integrity of compressed data read from standard input is verified. Data +read from standard input must be all in the same compressed format. If +a file fails to decompress, does not exist, can't be opened, or is a +terminal, ztest continues verifying the rest of the files. A final +diagnostic is shown at verbosity level 1 or higher if any file fails the +test when testing multiple files. +.PP +If no files are specified, recursive searches examine the current +working directory, and nonrecursive searches read standard input. +.PP +The formats supported are bzip2, gzip, lzip, and xz. +.PP +Note that error detection in the xz format is broken. First, some xz +files lack integrity information. Second, not all xz decompressors can +verify the integrity of all xz files. Third, section 2.1.1.2 'Stream +Flags' of the xz format specification allows xz decompressors to produce +garbage output without issuing any warning. Therefore, xz files can't +always be verified as reliably as files in the other formats can. +.PP +Exit status is 0 if all compressed files verify OK, 1 if environmental +problems (file not found, invalid flags, I/O errors, etc), 2 if any +compressed file is corrupt or invalid. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-M\fR, \fB\-\-format=\fR<list> +process only the formats in <list> +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-O\fR, \fB\-\-force\-format=\fR<fmt> +force the format given (bz2, gz, lz, xz) +.TP +\fB\-q\fR, \fB\-\-quiet\fR +suppress all messages +.TP +\fB\-r\fR, \fB\-\-recursive\fR +operate recursively on directories +.TP +\fB\-R\fR, \fB\-\-dereference\-recursive\fR +recursively follow symbolic links +.TP +\fB\-v\fR, \fB\-\-verbose\fR +be verbose (a 2nd \fB\-v\fR gives more) +.TP +\fB\-\-bz2=\fR<command> +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR<command> +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR<command> +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR<command> +set compressor and options for xz format +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2021 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. diff --git a/doc/zupdate.1 b/doc/zupdate.1 new file mode 100644 index 0000000..dcd3d24 --- /dev/null +++ b/doc/zupdate.1 @@ -0,0 +1,91 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. +.TH ZUPDATE "1" "January 2021" "zutils 1.10" "User Commands" +.SH NAME +zupdate \- recompress bzip2, gzip, xz files to lzip format +.SH SYNOPSIS +.B zupdate +[\fI\,options\/\fR] [\fI\,files\/\fR] +.SH DESCRIPTION +zupdate recompresses files from bzip2, gzip, and xz formats to lzip +format. Each original is compared with the new file and then deleted. +Only regular files with standard file name extensions are recompressed, +other files are ignored. Compressed files are decompressed and then +recompressed on the fly; no temporary files are created. The lzip format +is chosen as destination because it is the most appropriate for +long\-term data archiving. +.PP +If no files are specified, recursive searches examine the current +working directory, and nonrecursive searches do nothing. +.PP +If the lzip compressed version of a file already exists, the file is +skipped unless the option '\-\-force' is given. In this case, if the +comparison with the existing lzip version fails, an error is returned +and the original file is not deleted. The operation of zupdate is meant +to be safe and not cause any data loss. Therefore, existing lzip +compressed files are never overwritten nor deleted. +.PP +The names of the original files must have one of the following extensions: +\&'.bz2', '.gz', or '.xz', which are recompressed to '.lz'; +\&'.tbz', '.tbz2', '.tgz', or '.txz', which are recompressed to '.tlz'. +.PP +Exit status is 0 if all the compressed files were successfully recompressed +(if needed), compared, and deleted (if requested). Non\-zero otherwise. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-f\fR, \fB\-\-force\fR +don't skip a file even if the .lz exists +.TP +\fB\-k\fR, \fB\-\-keep\fR +keep (don't delete) input files +.TP +\fB\-l\fR, \fB\-\-lzip\-verbose\fR +pass one option \fB\-v\fR to the lzip compressor +.TP +\fB\-M\fR, \fB\-\-format=\fR<list> +process only the formats in <list> +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-q\fR, \fB\-\-quiet\fR +suppress all messages +.TP +\fB\-r\fR, \fB\-\-recursive\fR +operate recursively on directories +.TP +\fB\-R\fR, \fB\-\-dereference\-recursive\fR +recursively follow symbolic links +.TP +\fB\-v\fR, \fB\-\-verbose\fR +be verbose (a 2nd \fB\-v\fR gives more) +.TP +\fB\-0\fR .. \fB\-9\fR +set compression level [default 9] +.TP +\fB\-\-bz2=\fR<command> +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR<command> +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR<command> +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR<command> +set compressor and options for xz format +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2021 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. diff --git a/doc/zutils.info b/doc/zutils.info new file mode 100644 index 0000000..854100f --- /dev/null +++ b/doc/zutils.info @@ -0,0 +1,838 @@ +This is zutils.info, produced by makeinfo version 4.13+ from zutils.texi. + +INFO-DIR-SECTION Data Compression +START-INFO-DIR-ENTRY +* Zutils: (zutils). Utilities dealing with compressed files +END-INFO-DIR-ENTRY + + +File: zutils.info, Node: Top, Next: Introduction, Up: (dir) + +Zutils Manual +************* + +This manual is for Zutils (version 1.10, 5 January 2021). + +* Menu: + +* Introduction:: Purpose and features of zutils +* Common options:: Options common to all utilities +* The zutilsrc file:: The zutils configuration file +* Zcat:: Concatenating compressed files +* Zcmp:: Comparing compressed files byte by byte +* Zdiff:: Comparing compressed files line by line +* Zgrep:: Searching inside compressed files +* Ztest:: Testing the integrity of compressed files +* Zupdate:: Recompressing files to lzip format +* Problems:: Reporting bugs +* Concept index:: Index of concepts + + + Copyright (C) 2009-2021 Antonio Diaz Diaz. + + This manual is free documentation: you have unlimited permission to copy, +distribute, and modify it. + + +File: zutils.info, Node: Introduction, Next: Common options, Prev: Top, Up: Top + +1 Introduction +************** + +Zutils is a collection of utilities able to process any combination of +compressed and uncompressed files transparently. If any file given, +including standard input, is compressed, its decompressed content is used. +Compressed files are decompressed on the fly; no temporary files are +created. + + These utilities are not wrapper scripts but safer and more efficient C++ +programs. In particular the option '--recursive' is very efficient in those +utilities supporting it. + +The utilities provided are zcat, zcmp, zdiff, zgrep, ztest, and zupdate. +The formats supported are bzip2, gzip, lzip, and xz. +Zutils uses external compressors. The compressor to be used for each format +is configurable at runtime. + + zcat, zcmp, zdiff, and zgrep are improved replacements for the shell +scripts provided by GNU gzip. ztest is unique to zutils. zupdate is similar +to gzip's znew. + + NOTE: Bzip2 and lzip provide well-defined values of exit status, which +makes them safe to use with zutils. Gzip and xz may return ambiguous warning +values, making them less reliable back ends for zutils. *Note +compressor-requirements::. + + FORMAT NOTE 1: The option '--format' allows the processing of a subset +of formats in recursive mode and when trying compressed file names: +'zgrep foo -r --format=bz2,lz somedir somefile.tar'. + + FORMAT NOTE 2: If the option '--force-format' is given, the files are +passed to the corresponding decompressor without verifying their format, +allowing for example the processing of compress'd (.Z) files with gzip: +'zcmp --force-format=gz file.Z file.lz'. + + LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never +have been compressed. Decompressed is used to refer to data which have +undergone the process of decompression. + + + Numbers given as arguments to options (positions, sizes) may be followed +by a multiplier and an optional 'B' for "byte". + + Table of SI and binary prefixes (unit multipliers): + +Prefix Value | Prefix Value +k kilobyte (10^3 = 1000) | Ki kibibyte (2^10 = 1024) +M megabyte (10^6) | Mi mebibyte (2^20) +G gigabyte (10^9) | Gi gibibyte (2^30) +T terabyte (10^12) | Ti tebibyte (2^40) +P petabyte (10^15) | Pi pebibyte (2^50) +E exabyte (10^18) | Ei exbibyte (2^60) +Z zettabyte (10^21) | Zi zebibyte (2^70) +Y yottabyte (10^24) | Yi yobibyte (2^80) + + +File: zutils.info, Node: Common options, Next: The zutilsrc file, Prev: Introduction, Up: Top + +2 Common options +**************** + +The following options: are available in all the utilities. Rather than +writing identical descriptions for each of the programs, they are described +here. *Note Argument syntax: (arg_parser)Argument syntax. + +'-h' +'--help' + Print an informative help message describing the options and exit. + zgrep only supports the '--help' form of this option. + +'-V' +'--version' + Print the version number on the standard output and exit. This version + number should be included in all bug reports. + +'-M FORMAT_LIST' +'--format=FORMAT_LIST' + Process only the formats listed in the comma-separated FORMAT_LIST. + Valid formats are 'bz2', 'gz', 'lz', 'xz', and 'un' for + 'uncompressed', meaning "any file name without a known extension". + This option excludes files based on extension, instead of format, + because it is more efficient. The exclusion only applies to names + generated automatically (for example when adding extensions to a file + name or when operating recursively on directories). Files given in the + command line are always processed. + + Each format in FORMAT_LIST enables file names with the following + extensions: + + bz2 enables .bz2 .tbz .tbz2 + gz enables .gz .tgz + lz enables .lz .tlz + xz enables .xz .txz + un enables any other file name + +'-N' +'--no-rcfile' + Don't read the runtime configuration file 'zutilsrc'. + +'--bz2=COMMAND' +'--gz=COMMAND' +'--lz=COMMAND' +'--xz=COMMAND' + Set program to be used as (de)compressor for the corresponding format. + COMMAND may include arguments. For example '--lz='plzip --threads=2''. + The program set with '--lz' is used for both compression and + decompression. The other three are used only for decompression. The + name of the program can't begin with '-'. These options override the + values set in 'zutilsrc'. The compression program used must meet three + requirements: + + 1. When called with the option '-d', it must read compressed data + from the standard input and produce decompressed data on the + standard output. + + 2. If the option '-q' is passed to zutils, the compression program + must also accept it. + + 3. It must return 0 if no errors occurred, and a non-zero value + otherwise. + + + +File: zutils.info, Node: The zutilsrc file, Next: Zcat, Prev: Common options, Up: Top + +3 The zutils configuration file 'zutilsrc' +****************************************** + +'zutilsrc' is the runtime configuration file for zutils. In it you may +define the compressor name and options to be used for each format. +'zutilsrc' is optional; you don't need to install it in order to run zutils. + + The compressors specified in the command line override those specified +in 'zutilsrc'. + + You may copy the system 'zutilsrc' file '${sysconfdir}/zutilsrc' to +'$HOME/.zutilsrc' and customize these options as you like. The file syntax +is fairly obvious (and there are further instructions in it): + + 1. Any line beginning with '#' is a comment line. + + 2. Each non-comment line defines the command to be used for the + corresponding format, with the syntax: + <format> = <compressor> [options] + where <format> is one of 'bz2', 'gz', 'lz', or 'xz'. + + +File: zutils.info, Node: Zcat, Next: Zcmp, Prev: The zutilsrc file, Up: Top + +4 Zcat +****** + +zcat copies each FILE argument to standard output in sequence. If any file +given is compressed, its decompressed content is copied. If a file given +does not exist, and its name does not end with one of the known extensions, +zcat tries the compressed file names corresponding to the formats +supported. If a file fails to decompress, zcat continues copying the rest +of the files. + + If a file is specified as '-', data are read from standard input, +decompressed if needed, and sent to standard output. Data read from +standard input must be of the same type; all uncompressed or all in the +same compressed format. + + If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + + The format for running zcat is: + + zcat [OPTIONS] [FILES] + +Exit status is 0 if no errors occurred, 1 otherwise. + + zcat supports the following options: + +'-A' +'--show-all' + Equivalent to '-vET'. + +'-b' +'--number-nonblank' + Number all nonblank output lines, starting with 1. The line count is + unlimited. + +'-e' + Equivalent to '-vE'. + +'-E' +'--show-ends' + Print a '$' after the end of each line. + +'-n' +'--number' + Number all output lines, starting with 1. The line count is unlimited. + +'-O FORMAT' +'--force-format=FORMAT' + Force the compressed format given. Valid values for FORMAT are 'bz2', + 'gz', 'lz', and 'xz'. If this option is used, the files are passed to + the corresponding decompressor without verifying their format, and the + exact file name must be given. Other names won't be tried. + +'-q' +'--quiet' + Quiet operation. Suppress all messages. + +'-r' +'--recursive' + For each directory operand, read and process all files in that + directory, recursively. Follow symbolic links given in the command + line, but skip symbolic links that are encountered recursively. + +'-R' +'--dereference-recursive' + For each directory operand, read and process all files in that + directory, recursively, following all symbolic links. + +'-s' +'--squeeze-blank' + Replace multiple adjacent blank lines with a single blank line. + +'-t' + Equivalent to '-vT'. + +'-T' +'--show-tabs' + Print TAB characters as '^I'. + +'-v' +'--show-nonprinting' + Print control characters except for LF (newline) and TAB using '^' + notation and precede characters larger than 127 with 'M-' (which + stands for "meta"). + +'--verbose' + Verbose mode. Show error messages. + + + +File: zutils.info, Node: Zcmp, Next: Zdiff, Prev: Zcat, Up: Top + +5 Zcmp +****** + +zcmp compares two files and, if they differ, writes to standard output the +first byte and line number where they differ. Bytes and lines are numbered +starting with 1. A hyphen '-' used as a FILE argument means standard input. +If any file given is compressed, its decompressed content is used. +Compressed files are decompressed on the fly; no temporary files are +created. + + The format for running zcmp is: + + zcmp [OPTIONS] FILE1 [FILE2] + +This compares FILE1 to FILE2. The standard input is used only if FILE1 or +FILE2 refers to standard input. If FILE2 is omitted zcmp tries the +following: + + - If FILE1 is compressed, compares its decompressed contents with the + corresponding uncompressed file (the name of FILE1 with the extension + removed). + + - If FILE1 is uncompressed, compares it with the decompressed contents + of FILE1.[lz|bz2|gz|xz] (the first one that is found). + +An exit status of 0 means no differences were found, 1 means some +differences were found, and 2 means trouble. + + zcmp supports the following options: + +'-b' +'--print-bytes' + Print the differing bytes. Print control bytes as a '^' followed by a + letter, and precede bytes larger than 127 with 'M-' (which stands for + "meta"). + +'-i SIZE' +'--ignore-initial=SIZE' + Ignore any differences in the first SIZE bytes of the input files. + Treat files with fewer than SIZE bytes as if they were empty. If SIZE + is in the form 'SIZE1:SIZE2', ignore the first SIZE1 bytes of the + first input file and the first SIZE2 bytes of the second input file. + +'-l' +'-v' +'--list' +'--verbose' + Print the byte numbers (in decimal) and values (in octal) of all + differing bytes. + +'-n COUNT' +'--bytes=COUNT' + Compare at most COUNT input bytes. + +'-O [FORMAT1][,FORMAT2]' +'--force-format=[FORMAT1][,FORMAT2]' + Force the compressed formats given. Any of FORMAT1 or FORMAT2 may be + omitted and the corresponding format will be automatically detected. + Valid values for FORMAT are 'bz2', 'gz', 'lz', and 'xz'. If at least + one format is specified with this option, the file is passed to the + corresponding decompressor without verifying its format, and the exact + file names of both FILE1 and FILE2 must be given. Other names won't be + tried. + +'-q' +'-s' +'--quiet' +'--silent' + Don't print anything; only return an exit status indicating whether the + files differ. + + + +File: zutils.info, Node: Zdiff, Next: Zgrep, Prev: Zcmp, Up: Top + +6 Zdiff +******* + +zdiff compares two files and, if they differ, writes to standard output the +differences line by line. A hyphen '-' used as a FILE argument means +standard input. If any file given is compressed, its decompressed content +is used. zdiff is a front end to the program diff and has the limitation +that messages from diff refer to temporary file names instead of those +specified. + + The format for running zdiff is: + + zdiff [OPTIONS] FILE1 [FILE2] + +This compares FILE1 to FILE2. The standard input is used only if FILE1 or +FILE2 refers to standard input. If FILE2 is omitted zdiff tries the +following: + + - If FILE1 is compressed, compares its decompressed contents with the + corresponding uncompressed file (the name of FILE1 with the extension + removed). + + - If FILE1 is uncompressed, compares it with the decompressed contents + of FILE1.[lz|bz2|gz|xz] (the first one that is found). + +An exit status of 0 means no differences were found, 1 means some +differences were found, and 2 means trouble. + + zdiff supports the following options (some options only work if the diff +program used supports them): + +'-a' +'--text' + Treat all files as text. + +'-b' +'--ignore-space-change' + Ignore changes in the amount of white space. + +'-B' +'--ignore-blank-lines' + Ignore changes whose lines are all blank. + +'-c' + Use the context output format. + +'-C N' +'--context=N' + Same as -c but use N lines of context. + +'-d' +'--minimal' + Try hard to find a smaller set of changes. + +'-E' +'--ignore-tab-expansion' + Ignore changes due to tab expansion. + +'-i' +'--ignore-case' + Ignore case differences in file contents. + +'-O [FORMAT1][,FORMAT2]' +'--force-format=[FORMAT1][,FORMAT2]' + Force the compressed formats given. Any of FORMAT1 or FORMAT2 may be + omitted and the corresponding format will be automatically detected. + Valid values for FORMAT are 'bz2', 'gz', 'lz', and 'xz'. If at least + one format is specified with this option, the file is passed to the + corresponding decompressor without verifying its format, and the exact + file names of both FILE1 and FILE2 must be given. Other names won't be + tried. + +'-p' +'--show-c-function' + Show which C function each change is in. + +'-q' +'--brief' + Output only whether files differ. + +'-s' +'--report-identical-files' + Report when two files are identical. + +'-t' +'--expand-tabs' + Expand tabs to spaces in output. + +'-T' +'--initial-tab' + Make tabs line up by prepending a tab. + +'-u' + Use the unified output format. + +'-U N' +'--unified=N' + Same as -u but use N lines of context. + +'-w' +'--ignore-all-space' + Ignore all white space. + + + +File: zutils.info, Node: Zgrep, Next: Ztest, Prev: Zdiff, Up: Top + +7 Zgrep +******* + +zgrep is a front end to the program grep that allows transparent search on +any combination of compressed and uncompressed files. If any file given is +compressed, its decompressed content is used. If a file given does not +exist, and its name does not end with one of the known extensions, zgrep +tries the compressed file names corresponding to the formats supported. If +a file fails to decompress, zgrep continues searching the rest of the files. + + If a file is specified as '-', data are read from standard input, +decompressed if needed, and fed to grep. Data read from standard input must +be of the same type; all uncompressed or all in the same compressed format. + + If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + + The format for running zgrep is: + + zgrep [OPTIONS] PATTERN [FILES] + +An exit status of 0 means at least one match was found, 1 means no matches +were found, and 2 means trouble. + + zgrep supports the following options (some options only work if the grep +program used supports them): + +'-a' +'--text' + Treat all files as text. + +'-A N' +'--after-context=N' + Print N lines of trailing context. + +'-b' +'--byte-offset' + Print the byte offset of each line. + +'-B N' +'--before-context=N' + Print N lines of leading context. + +'-c' +'--count' + Only print a count of matching lines per file. + +'-C N' +'--context=N' + Print N lines of output context. + +'--color[=WHEN]' + Show matched strings in color. WHEN is 'never', 'always', or 'auto'. + +'-e PATTERN' +'--regexp=PATTERN' + Use PATTERN as the pattern to match. + +'-E' +'--extended-regexp' + Treat PATTERN as an extended regular expression. + +'-f FILE' +'--file=FILE' + Obtain patterns from FILE, one per line. + When searching in several files at once, command substitution can be + used with '-e' to read FILE only once, for example if FILE is not a + regular file: 'zgrep -e "$(cat FILE)" file1.lz file2.gz' + +'-F' +'--fixed-strings' + Treat PATTERN as a set of newline-separated strings. + +'-h' +'--no-filename' + Suppress the prefixing of file names on output when multiple files are + searched. + +'-H' +'--with-filename' + Print the file name for each match. + +'-i' +'--ignore-case' + Ignore case distinctions. + +'-I' + Ignore binary files. + +'-l' +'--files-with-matches' + Only print names of files containing at least one match. + +'-L' +'--files-without-match' + Only print names of files not containing any matches. + Note: option -L fails (prints wrong results, returns wrong status, and + even hangs) when using GNU grep versions 3.2 to 3.4 inclusive because + of a wrong change in the exit status of grep, which was reverted in + GNU grep 3.5. + +'-m N' +'--max-count=N' + Stop after N matches. + +'-n' +'--line-number' + Prefix each matched line with its line number in the input file. + +'-o' +'--only-matching' + Show only the part of matching lines that actually matches PATTERN. + +'-O FORMAT' +'--force-format=FORMAT' + Force the compressed format given. Valid values for FORMAT are 'bz2', + 'gz', 'lz', and 'xz'. If this option is used, the files are passed to + the corresponding decompressor without verifying their format, and the + exact file name must be given. Other names won't be tried. + +'-q' +'--quiet' + Suppress all messages. Exit immediately with zero status if any match + is found, even if an error was detected. + +'-r' +'--recursive' + For each directory operand, read and process all files in that + directory, recursively. Follow symbolic links given in the command + line, but skip symbolic links that are encountered recursively. + +'-R' +'--dereference-recursive' + For each directory operand, read and process all files in that + directory, recursively, following all symbolic links. + +'-s' +'--no-messages' + Suppress error messages about nonexistent or unreadable files. + +'-v' +'--invert-match' + Select non-matching lines. + +'--verbose' + Verbose mode. Show error messages. + +'-w' +'--word-regexp' + Match only whole words. + +'-x' +'--line-regexp' + Match only whole lines. + + + +File: zutils.info, Node: Ztest, Next: Zupdate, Prev: Zgrep, Up: Top + +8 Ztest +******* + +ztest verifies the integrity of the compressed files specified. +Uncompressed files are ignored. If a file is specified as '-', the +integrity of compressed data read from standard input is verified. Data +read from standard input must be all in the same compressed format. If a +file fails to decompress, does not exist, can't be opened, or is a +terminal, ztest continues verifying the rest of the files. A final +diagnostic is shown at verbosity level 1 or higher if any file fails the +test when testing multiple files. + + If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + + Note that error detection in the xz format is broken. First, some xz +files lack integrity information. Second, not all xz decompressors can +verify the integrity of all xz files. Third, section 2.1.1.2 'Stream Flags' +of the xz format specification allows xz decompressors to produce garbage +output without issuing any warning. Therefore, xz files can't always be +verified as reliably as files in the other formats can. + + The format for running ztest is: + + ztest [OPTIONS] [FILES] + +The exit status is 0 if all compressed files verify OK, 1 if environmental +problems (file not found, invalid flags, I/O errors, etc), 2 if any +compressed file is corrupt or invalid. + + ztest supports the following options: + +'-O FORMAT' +'--force-format=FORMAT' + Force the compressed format given. Valid values for FORMAT are 'bz2', + 'gz', 'lz', and 'xz'. If this option is used, the files are passed to + the corresponding decompressor without verifying their format, and any + files in a format that the decompressor can't understand will fail. + For example, '--force-format=gz' can test gzipped (.gz) and compress'd + (.Z) files if the compressor used is GNU gzip. + +'-q' +'--quiet' + Quiet operation. Suppress all messages. + +'-r' +'--recursive' + For each directory operand, read and process all files in that + directory, recursively. Follow symbolic links given in the command + line, but skip symbolic links that are encountered recursively. + +'-R' +'--dereference-recursive' + For each directory operand, read and process all files in that + directory, recursively, following all symbolic links. + +'-v' +'--verbose' + Verbose mode. Show the verify status for each file processed. + Further -v's increase the verbosity level. + + + +File: zutils.info, Node: Zupdate, Next: Problems, Prev: Ztest, Up: Top + +9 Zupdate +********* + +zupdate recompresses files from bzip2, gzip, and xz formats to lzip format. +Each original is compared with the new file and then deleted. Only regular +files with standard file name extensions are recompressed, other files are +ignored. Compressed files are decompressed and then recompressed on the fly; +no temporary files are created. If an error happens while recompressing a +file, zupdate exits immediately without recompressing the rest of the files. +The lzip format is chosen as destination because it is the most appropriate +for long-term data archiving. + + If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches do nothing. + + If the lzip compressed version of a file already exists, the file is +skipped unless the option '--force' is given. In this case, if the +comparison with the existing lzip version fails, an error is returned and +the original file is not deleted. The operation of zupdate is meant to be +safe and not cause any data loss. Therefore, existing lzip compressed files +are never overwritten nor deleted. + + Combining the options '--force' and '--keep', as in +'zupdate -f -k *.gz', verifies that there are no differences between each +pair of files in a multiformat set of files. + + The names of the original files must have one of the following +extensions: +'.bz2', '.gz', or '.xz', which are recompressed to '.lz'; +'.tbz', '.tbz2', '.tgz', or '.txz', which are recompressed to '.tlz'. +Keeping the combined extensions ('.tgz' -> '.tlz') may be useful when +recompressing Slackware packages, for example. + + Recompressing a file is much like copying or moving it; therefore zupdate +preserves the access and modification dates, permissions, and, when +possible, ownership of the file just as 'cp -p' does. (If the user ID or +the group ID can't be duplicated, the file permission bits S_ISUID and +S_ISGID are cleared). + + The format for running zupdate is: + + zupdate [OPTIONS] [FILES] + +Exit status is 0 if all the compressed files were successfully recompressed +(if needed), compared, and deleted (if requested). Non-zero otherwise. + + zupdate supports the following options: + +'-f' +'--force' + Don't skip a file for which a lzip compressed version already exists. + '--force' compares the content of the input file with the content of + the existing lzip file and deletes the input file if both contents are + identical. + +'-k' +'--keep' + Keep (don't delete) the input file after comparing it with the lzip + file. + +'-l' +'--lzip-verbose' + Pass one option '-v' to the lzip compressor so that it shows the + compression ratio for each file processed. Using lzip 1.15 or newer, a + second '-l' shows the progress of compression. Use it together with + '-v' to see the name of the file. + +'-q' +'--quiet' + Quiet operation. Suppress all messages. + +'-r' +'--recursive' + For each directory operand, read and process all files in that + directory, recursively. Follow symbolic links given in the command + line, but skip symbolic links that are encountered recursively. + +'-R' +'--dereference-recursive' + For each directory operand, read and process all files in that + directory, recursively, following all symbolic links. + +'-v' +'--verbose' + Verbose mode. Show the files being processed. A second '-v' also shows + the files being ignored. + +'-0 .. -9' + Set the compression level of lzip. By default zupdate passes '-9' to + lzip. Custom compression options can be passed to lzip with the option + '--lz'. For example '--lz='lzip -9 -s64MiB''. + + + +File: zutils.info, Node: Problems, Next: Concept index, Prev: Zupdate, Up: Top + +10 Reporting bugs +***************** + +There are probably bugs in zutils. There are certainly errors and omissions +in this manual. If you report them, they will get fixed. If you don't, no +one will ever know about them and they will remain unfixed for all +eternity, if not longer. + + If you find a bug in zutils, please send electronic mail to +<zutils-bug@nongnu.org>. Include the version number, which you can find by +running 'zupdate --version'. + + +File: zutils.info, Node: Concept index, Prev: Problems, Up: Top + +Concept index +************* + + +* Menu: + +* bugs: Problems. (line 6) +* common options: Common options. (line 6) +* getting help: Problems. (line 6) +* introduction: Introduction. (line 6) +* zcat: Zcat. (line 6) +* zcmp: Zcmp. (line 6) +* zdiff: Zdiff. (line 6) +* zgrep: Zgrep. (line 6) +* ztest: Ztest. (line 6) +* zupdate: Zupdate. (line 6) +* zutilsrc: The zutilsrc file. (line 6) + + + +Tag Table: +Node: Top222 +Node: Introduction1151 +Node: Common options3776 +Ref: compressor-requirements5847 +Node: The zutilsrc file6219 +Node: Zcat7180 +Node: Zcmp9743 +Node: Zdiff12233 +Node: Zgrep14973 +Node: Ztest19218 +Node: Zupdate21725 +Node: Problems25409 +Node: Concept index25943 + +End Tag Table + + +Local Variables: +coding: iso-8859-15 +End: diff --git a/doc/zutils.texi b/doc/zutils.texi new file mode 100644 index 0000000..c494185 --- /dev/null +++ b/doc/zutils.texi @@ -0,0 +1,882 @@ +\input texinfo @c -*-texinfo-*- +@c %**start of header +@setfilename zutils.info +@documentencoding ISO-8859-15 +@settitle Zutils Manual +@finalout +@c %**end of header + +@set UPDATED 5 January 2021 +@set VERSION 1.10 + +@dircategory Data Compression +@direntry +* Zutils: (zutils). Utilities dealing with compressed files +@end direntry + + +@ifnothtml +@titlepage +@title Zutils +@subtitle Utilities dealing with compressed files +@subtitle for Zutils version @value{VERSION}, @value{UPDATED} +@author by Antonio Diaz Diaz + +@page +@vskip 0pt plus 1filll +@end titlepage + +@contents +@end ifnothtml + +@ifnottex +@node Top +@top + +This manual is for Zutils (version @value{VERSION}, @value{UPDATED}). + +@menu +* Introduction:: Purpose and features of zutils +* Common options:: Options common to all utilities +* The zutilsrc file:: The zutils configuration file +* Zcat:: Concatenating compressed files +* Zcmp:: Comparing compressed files byte by byte +* Zdiff:: Comparing compressed files line by line +* Zgrep:: Searching inside compressed files +* Ztest:: Testing the integrity of compressed files +* Zupdate:: Recompressing files to lzip format +* Problems:: Reporting bugs +* Concept index:: Index of concepts +@end menu + +@sp 1 +Copyright @copyright{} 2009-2021 Antonio Diaz Diaz. + +This manual is free documentation: you have unlimited permission to copy, +distribute, and modify it. +@end ifnottex + + +@node Introduction +@chapter Introduction +@cindex introduction + +@uref{http://www.nongnu.org/zutils/zutils.html,,Zutils} +is a collection of utilities able to process any combination of +compressed and uncompressed files transparently. If any file given, +including standard input, is compressed, its decompressed content is used. +Compressed files are decompressed on the fly; no temporary files are +created. + +These utilities are not wrapper scripts but safer and more efficient C++ +programs. In particular the option @samp{--recursive} is very efficient in +those utilities supporting it. + +@noindent +The utilities provided are zcat, zcmp, zdiff, zgrep, ztest, and zupdate.@* +The formats supported are bzip2, gzip, lzip, and xz.@* +Zutils uses external compressors. The compressor to be used for each format +is configurable at runtime. + +zcat, zcmp, zdiff, and zgrep are improved replacements for the shell scripts +provided by GNU gzip. ztest is unique to zutils. zupdate is similar to +gzip's znew. + +NOTE: Bzip2 and lzip provide well-defined values of exit status, which makes +them safe to use with zutils. Gzip and xz may return ambiguous warning +values, making them less reliable back ends for zutils. +@xref{compressor-requirements}. + +FORMAT NOTE 1: The option @samp{--format} allows the processing of a subset +of formats in recursive mode and when trying compressed file names: +@w{@samp{zgrep foo -r --format=bz2,lz somedir somefile.tar}}. + +FORMAT NOTE 2: If the option @samp{--force-format} is given, the files are +passed to the corresponding decompressor without verifying their format, +allowing for example the processing of compress'd (.Z) files with gzip: +@w{@samp{zcmp --force-format=gz file.Z file.lz}}. + +LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never have +been compressed. Decompressed is used to refer to data which have undergone +the process of decompression. + +@sp 1 +Numbers given as arguments to options (positions, sizes) may be followed +by a multiplier and an optional @samp{B} for "byte". + +Table of SI and binary prefixes (unit multipliers): + +@multitable {Prefix} {kilobyte (10^3 = 1000)} {|} {Prefix} {kibibyte (2^10 = 1024)} +@item Prefix @tab Value @tab | @tab Prefix @tab Value +@item k @tab kilobyte (10^3 = 1000) @tab | @tab Ki @tab kibibyte (2^10 = 1024) +@item M @tab megabyte (10^6) @tab | @tab Mi @tab mebibyte (2^20) +@item G @tab gigabyte (10^9) @tab | @tab Gi @tab gibibyte (2^30) +@item T @tab terabyte (10^12) @tab | @tab Ti @tab tebibyte (2^40) +@item P @tab petabyte (10^15) @tab | @tab Pi @tab pebibyte (2^50) +@item E @tab exabyte (10^18) @tab | @tab Ei @tab exbibyte (2^60) +@item Z @tab zettabyte (10^21) @tab | @tab Zi @tab zebibyte (2^70) +@item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80) +@end multitable + + +@node Common options +@chapter Common options +@cindex common options + +The following +@uref{http://www.nongnu.org/arg-parser/manual/arg_parser_manual.html#Argument-syntax,,options}: +are available in all the utilities. Rather than writing identical +descriptions for each of the programs, they are described here. +@ifnothtml +@xref{Argument syntax,,,arg_parser}. +@end ifnothtml + +@table @code +@item -h +@itemx --help +Print an informative help message describing the options and exit. zgrep +only supports the @samp{--help} form of this option. + +@item -V +@itemx --version +Print the version number on the standard output and exit. +This version number should be included in all bug reports. + +@item -M @var{format_list} +@itemx --format=@var{format_list} +Process only the formats listed in the comma-separated +@var{format_list}. Valid formats are @samp{bz2}, @samp{gz}, @samp{lz}, +@samp{xz}, and @samp{un} for @samp{uncompressed}, meaning "any file name +without a known extension". This option excludes files based on +extension, instead of format, because it is more efficient. The +exclusion only applies to names generated automatically (for example +when adding extensions to a file name or when operating recursively on +directories). Files given in the command line are always processed. + +Each format in @var{format_list} enables file names with the following +extensions: + +@multitable {bz2} {enables} {any other file name} +@item bz2 @tab enables @tab .bz2 .tbz .tbz2 +@item gz @tab enables @tab .gz .tgz +@item lz @tab enables @tab .lz .tlz +@item xz @tab enables @tab .xz .txz +@item un @tab enables @tab any other file name +@end multitable + +@item -N +@itemx --no-rcfile +Don't read the runtime configuration file @samp{zutilsrc}. + +@item --bz2=@var{command} +@itemx --gz=@var{command} +@itemx --lz=@var{command} +@itemx --xz=@var{command} +Set program to be used as (de)compressor for the corresponding format. +@var{command} may include arguments. For example +@w{@samp{--lz='plzip --threads=2'}}. The program set with @samp{--lz} is +used for both compression and decompression. The other three are used only +for decompression. The name of the program can't begin with @samp{-}. These +options override the values set in @file{zutilsrc}. The compression program +used must meet three requirements: + +@anchor{compressor-requirements} +@enumerate +@item +When called with the option @samp{-d}, it must read compressed data from +the standard input and produce decompressed data on the standard output. +@item +If the option @samp{-q} is passed to zutils, the compression program must +also accept it. +@item +It must return 0 if no errors occurred, and a non-zero value otherwise. +@end enumerate + +@end table + + +@node The zutilsrc file +@chapter The zutils configuration file 'zutilsrc' +@cindex zutilsrc + +@file{zutilsrc} is the runtime configuration file for zutils. In it you +may define the compressor name and options to be used for each format. +@file{zutilsrc} is optional; you don't need to install it in order to run +zutils. + +The compressors specified in the command line override those specified +in @file{zutilsrc}. + +You may copy the system @file{zutilsrc} file @file{$@{sysconfdir@}/zutilsrc} +to @file{$HOME/.zutilsrc} and customize these options as you like. The file +syntax is fairly obvious (and there are further instructions in it): + +@enumerate +@item +Any line beginning with @samp{#} is a comment line. +@item +Each non-comment line defines the command to be used for the corresponding +format, with the syntax: +@example +<format> = <compressor> [options] +@end example +where <format> is one of @samp{bz2}, @samp{gz}, @samp{lz}, or @samp{xz}. +@end enumerate + + +@node Zcat +@chapter Zcat +@cindex zcat + +zcat copies each @var{file} argument to standard output in sequence. If any +file given is compressed, its decompressed content is copied. If a file +given does not exist, and its name does not end with one of the known +extensions, zcat tries the compressed file names corresponding to the +formats supported. If a file fails to decompress, zcat continues copying the +rest of the files. + +If a file is specified as @samp{-}, data are read from standard input, +decompressed if needed, and sent to standard output. Data read from +standard input must be of the same type; all uncompressed or all in the +same compressed format. + +If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + +The format for running zcat is: + +@example +zcat [@var{options}] [@var{files}] +@end example + +@noindent +Exit status is 0 if no errors occurred, 1 otherwise. + +zcat supports the following options: + +@table @code +@item -A +@itemx --show-all +Equivalent to @samp{-vET}. + +@item -b +@itemx --number-nonblank +Number all nonblank output lines, starting with 1. The line count is +unlimited. + +@item -e +Equivalent to @samp{-vE}. + +@item -E +@itemx --show-ends +Print a @samp{$} after the end of each line. + +@item -n +@itemx --number +Number all output lines, starting with 1. The line count is unlimited. + +@item -O @var{format} +@itemx --force-format=@var{format} +Force the compressed format given. Valid values for @var{format} are +@samp{bz2}, @samp{gz}, @samp{lz}, and @samp{xz}. If this option is used, +the files are passed to the corresponding decompressor without verifying +their format, and the exact file name must be given. Other names won't +be tried. + +@item -q +@itemx --quiet +Quiet operation. Suppress all messages. + +@item -r +@itemx --recursive +For each directory operand, read and process all files in that directory, +recursively. Follow symbolic links given in the command line, but skip +symbolic links that are encountered recursively. + +@item -R +@itemx --dereference-recursive +For each directory operand, read and process all files in that directory, +recursively, following all symbolic links. + +@item -s +@itemx --squeeze-blank +Replace multiple adjacent blank lines with a single blank line. + +@item -t +Equivalent to @samp{-vT}. + +@item -T +@itemx --show-tabs +Print TAB characters as @samp{^I}. + +@item -v +@itemx --show-nonprinting +Print control characters except for LF (newline) and TAB using @samp{^} +notation and precede characters larger than 127 with @samp{M-} (which +stands for "meta"). + +@item --verbose +Verbose mode. Show error messages. + +@end table + + +@node Zcmp +@chapter Zcmp +@cindex zcmp + +zcmp compares two files and, if they differ, writes to standard output the +first byte and line number where they differ. Bytes and lines are numbered +starting with 1. A hyphen @samp{-} used as a @var{file} argument means +standard input. If any file given is compressed, its decompressed content is +used. Compressed files are decompressed on the fly; no temporary files are +created. + +The format for running zcmp is: + +@example +zcmp [@var{options}] @var{file1} [@var{file2}] +@end example + +@noindent +This compares @var{file1} to @var{file2}. The standard input is used only if +@var{file1} or @var{file2} refers to standard input. If @var{file2} is +omitted zcmp tries the following: + +@itemize - +@item +If @var{file1} is compressed, compares its decompressed contents with +the corresponding uncompressed file (the name of @var{file1} with the +extension removed). +@item +If @var{file1} is uncompressed, compares it with the decompressed +contents of @var{file1}.[lz|bz2|gz|xz] (the first one that is found). +@end itemize + +@noindent +An exit status of 0 means no differences were found, 1 means some +differences were found, and 2 means trouble. + +zcmp supports the following options: + +@table @code +@item -b +@itemx --print-bytes +Print the differing bytes. Print control bytes as a @samp{^} followed by +a letter, and precede bytes larger than 127 with @samp{M-} (which stands +for "meta"). + +@item -i @var{size} +@itemx --ignore-initial=@var{size} +Ignore any differences in the first @var{size} bytes of the input files. +Treat files with fewer than @var{size} bytes as if they were empty. If +@var{size} is in the form @samp{@var{size1}:@var{size2}}, ignore the +first @var{size1} bytes of the first input file and the first +@var{size2} bytes of the second input file. + +@item -l +@itemx -v +@itemx --list +@itemx --verbose +Print the byte numbers (in decimal) and values (in octal) of all +differing bytes. + +@item -n @var{count} +@itemx --bytes=@var{count} +Compare at most @var{count} input bytes. + +@item -O [@var{format1}][,@var{format2}] +@itemx --force-format=[@var{format1}][,@var{format2}] +Force the compressed formats given. Any of @var{format1} or +@var{format2} may be omitted and the corresponding format will be +automatically detected. Valid values for @var{format} are @samp{bz2}, +@samp{gz}, @samp{lz}, and @samp{xz}. If at least one format is specified +with this option, the file is passed to the corresponding decompressor +without verifying its format, and the exact file names of both +@var{file1} and @var{file2} must be given. Other names won't be tried. + +@item -q +@itemx -s +@itemx --quiet +@itemx --silent +Don't print anything; only return an exit status indicating whether the +files differ. + +@end table + + +@node Zdiff +@chapter Zdiff +@cindex zdiff + +zdiff compares two files and, if they differ, writes to standard output the +differences line by line. A hyphen @samp{-} used as a @var{file} argument +means standard input. If any file given is compressed, its decompressed +content is used. zdiff is a front end to the program diff and has the +limitation that messages from diff refer to temporary file names instead of +those specified. + +The format for running zdiff is: + +@example +zdiff [@var{options}] @var{file1} [@var{file2}] +@end example + +@noindent +This compares @var{file1} to @var{file2}. The standard input is used only if +@var{file1} or @var{file2} refers to standard input. If @var{file2} is +omitted zdiff tries the following: + +@itemize - +@item +If @var{file1} is compressed, compares its decompressed contents with +the corresponding uncompressed file (the name of @var{file1} with the +extension removed). +@item +If @var{file1} is uncompressed, compares it with the decompressed +contents of @var{file1}.[lz|bz2|gz|xz] (the first one that is found). +@end itemize + +@noindent +An exit status of 0 means no differences were found, 1 means some +differences were found, and 2 means trouble. + +zdiff supports the following options (some options only work if the diff +program used supports them): + +@table @code +@item -a +@itemx --text +Treat all files as text. + +@item -b +@itemx --ignore-space-change +Ignore changes in the amount of white space. + +@item -B +@itemx --ignore-blank-lines +Ignore changes whose lines are all blank. + +@itemx -c +Use the context output format. + +@item -C @var{n} +@itemx --context=@var{n} +Same as -c but use @var{n} lines of context. + +@item -d +@itemx --minimal +Try hard to find a smaller set of changes. + +@item -E +@itemx --ignore-tab-expansion +Ignore changes due to tab expansion. + +@item -i +@itemx --ignore-case +Ignore case differences in file contents. + +@item -O [@var{format1}][,@var{format2}] +@itemx --force-format=[@var{format1}][,@var{format2}] +Force the compressed formats given. Any of @var{format1} or +@var{format2} may be omitted and the corresponding format will be +automatically detected. Valid values for @var{format} are @samp{bz2}, +@samp{gz}, @samp{lz}, and @samp{xz}. If at least one format is specified +with this option, the file is passed to the corresponding decompressor +without verifying its format, and the exact file names of both +@var{file1} and @var{file2} must be given. Other names won't be tried. + +@item -p +@itemx --show-c-function +Show which C function each change is in. + +@item -q +@itemx --brief +Output only whether files differ. + +@item -s +@itemx --report-identical-files +Report when two files are identical. + +@item -t +@itemx --expand-tabs +Expand tabs to spaces in output. + +@item -T +@itemx --initial-tab +Make tabs line up by prepending a tab. + +@item -u +Use the unified output format. + +@item -U @var{n} +@itemx --unified=@var{n} +Same as -u but use @var{n} lines of context. + +@item -w +@itemx --ignore-all-space +Ignore all white space. + +@end table + + +@node Zgrep +@chapter Zgrep +@cindex zgrep + +zgrep is a front end to the program grep that allows transparent search +on any combination of compressed and uncompressed files. If any file +given is compressed, its decompressed content is used. If a file given +does not exist, and its name does not end with one of the known +extensions, zgrep tries the compressed file names corresponding to the +formats supported. If a file fails to decompress, zgrep continues +searching the rest of the files. + +If a file is specified as @samp{-}, data are read from standard input, +decompressed if needed, and fed to grep. Data read from standard input +must be of the same type; all uncompressed or all in the same +compressed format. + +If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + +The format for running zgrep is: + +@example +zgrep [@var{options}] @var{pattern} [@var{files}] +@end example + +@noindent +An exit status of 0 means at least one match was found, 1 means no +matches were found, and 2 means trouble. + +zgrep supports the following options (some options only work if the grep +program used supports them): + +@table @code +@item -a +@itemx --text +Treat all files as text. + +@item -A @var{n} +@itemx --after-context=@var{n} +Print @var{n} lines of trailing context. + +@item -b +@itemx --byte-offset +Print the byte offset of each line. + +@item -B @var{n} +@itemx --before-context=@var{n} +Print @var{n} lines of leading context. + +@item -c +@itemx --count +Only print a count of matching lines per file. + +@item -C @var{n} +@itemx --context=@var{n} +Print @var{n} lines of output context. + +@item --color[=@var{when}] +Show matched strings in color. @var{when} is @samp{never}, @samp{always}, +or @samp{auto}. + +@item -e @var{pattern} +@itemx --regexp=@var{pattern} +Use @var{pattern} as the pattern to match. + +@item -E +@itemx --extended-regexp +Treat @var{pattern} as an extended regular expression. + +@item -f @var{file} +@itemx --file=@var{file} +Obtain patterns from @var{file}, one per line.@* +When searching in several files at once, command substitution can be +used with @samp{-e} to read @var{file} only once, for example if +@var{file} is not a regular file: +@w{@samp{zgrep -e "$(cat @var{file})" file1.lz file2.gz}} + +@item -F +@itemx --fixed-strings +Treat @var{pattern} as a set of newline-separated strings. + +@item -h +@itemx --no-filename +Suppress the prefixing of file names on output when multiple files are +searched. + +@item -H +@itemx --with-filename +Print the file name for each match. + +@item -i +@itemx --ignore-case +Ignore case distinctions. + +@item -I +Ignore binary files. + +@item -l +@itemx --files-with-matches +Only print names of files containing at least one match. + +@item -L +@itemx --files-without-match +Only print names of files not containing any matches.@* +Note: option -L fails (prints wrong results, returns wrong status, and even +hangs) when using GNU grep versions 3.2 to 3.4 inclusive because of a wrong +change in the exit status of grep, which was reverted in GNU grep 3.5. + +@item -m @var{n} +@itemx --max-count=@var{n} +Stop after @var{n} matches. + +@item -n +@itemx --line-number +Prefix each matched line with its line number in the input file. + +@item -o +@itemx --only-matching +Show only the part of matching lines that actually matches @var{pattern}. + +@item -O @var{format} +@itemx --force-format=@var{format} +Force the compressed format given. Valid values for @var{format} are +@samp{bz2}, @samp{gz}, @samp{lz}, and @samp{xz}. If this option is used, +the files are passed to the corresponding decompressor without verifying +their format, and the exact file name must be given. Other names won't +be tried. + +@item -q +@itemx --quiet +Suppress all messages. Exit immediately with zero status if any match is +found, even if an error was detected. + +@item -r +@itemx --recursive +For each directory operand, read and process all files in that directory, +recursively. Follow symbolic links given in the command line, but skip +symbolic links that are encountered recursively. + +@item -R +@itemx --dereference-recursive +For each directory operand, read and process all files in that directory, +recursively, following all symbolic links. + +@item -s +@itemx --no-messages +Suppress error messages about nonexistent or unreadable files. + +@item -v +@itemx --invert-match +Select non-matching lines. + +@item --verbose +Verbose mode. Show error messages. + +@item -w +@itemx --word-regexp +Match only whole words. + +@item -x +@itemx --line-regexp +Match only whole lines. + +@end table + + +@node Ztest +@chapter Ztest +@cindex ztest + +ztest verifies the integrity of the compressed files specified. +Uncompressed files are ignored. If a file is specified as @samp{-}, the +integrity of compressed data read from standard input is verified. Data +read from standard input must be all in the same compressed format. If +a file fails to decompress, does not exist, can't be opened, or is a +terminal, ztest continues verifying the rest of the files. A final +diagnostic is shown at verbosity level 1 or higher if any file fails the +test when testing multiple files. + +If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + +Note that error detection in the xz format is broken. First, some xz +files lack integrity information. Second, not all xz decompressors can +@uref{http://www.nongnu.org/lzip/xz_inadequate.html#fragmented,,verify the integrity} +of all xz files. Third, section 2.1.1.2 'Stream Flags' of the +@uref{http://tukaani.org/xz/xz-file-format.txt,,xz format specification} +allows xz decompressors to produce garbage output without issuing any +warning. Therefore, xz files can't always be verified as reliably as +files in the other formats can. +@c We can only hope that xz is soon abandoned. + +The format for running ztest is: + +@example +ztest [@var{options}] [@var{files}] +@end example + +@noindent +The exit status is 0 if all compressed files verify OK, 1 if +environmental problems (file not found, invalid flags, I/O errors, etc), +2 if any compressed file is corrupt or invalid. + +ztest supports the following options: + +@table @code +@item -O @var{format} +@itemx --force-format=@var{format} +Force the compressed format given. Valid values for @var{format} are +@samp{bz2}, @samp{gz}, @samp{lz}, and @samp{xz}. If this option is used, the +files are passed to the corresponding decompressor without verifying their +format, and any files in a format that the decompressor can't understand +will fail. For example, @samp{--force-format=gz} can test gzipped (.gz) and +compress'd (.Z) files if the compressor used is GNU gzip. + +@item -q +@itemx --quiet +Quiet operation. Suppress all messages. + +@item -r +@itemx --recursive +For each directory operand, read and process all files in that directory, +recursively. Follow symbolic links given in the command line, but skip +symbolic links that are encountered recursively. + +@item -R +@itemx --dereference-recursive +For each directory operand, read and process all files in that directory, +recursively, following all symbolic links. + +@item -v +@itemx --verbose +Verbose mode. Show the verify status for each file processed.@* +Further -v's increase the verbosity level. + +@end table + + +@node Zupdate +@chapter Zupdate +@cindex zupdate + +zupdate recompresses files from bzip2, gzip, and xz formats to lzip format. +Each original is compared with the new file and then deleted. Only regular +files with standard file name extensions are recompressed, other files are +ignored. Compressed files are decompressed and then recompressed on the fly; +no temporary files are created. If an error happens while recompressing a +file, zupdate exits immediately without recompressing the rest of the files. +The lzip format is chosen as destination because it is the most appropriate +for long-term data archiving. + +If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches do nothing. + +If the lzip compressed version of a file already exists, the file is +skipped unless the option @samp{--force} is given. In this case, if the +comparison with the existing lzip version fails, an error is returned +and the original file is not deleted. The operation of zupdate is meant +to be safe and not cause any data loss. Therefore, existing lzip +compressed files are never overwritten nor deleted. + +Combining the options @samp{--force} and @samp{--keep}, as in +@w{@samp{zupdate -f -k *.gz}}, verifies that there are no differences +between each pair of files in a multiformat set of files. + +The names of the original files must have one of the following extensions:@* +@samp{.bz2}, @samp{.gz}, or @samp{.xz}, which are recompressed to +@samp{.lz};@* +@samp{.tbz}, @samp{.tbz2}, @samp{.tgz}, or @samp{.txz}, which are +recompressed to @samp{.tlz}.@* +Keeping the combined extensions (@samp{.tgz} --> @samp{.tlz}) may be useful +when recompressing Slackware packages, for example. + +Recompressing a file is much like copying or moving it; therefore zupdate +preserves the access and modification dates, permissions, and, when +possible, ownership of the file just as @samp{cp -p} does. (If the user ID or +the group ID can't be duplicated, the file permission bits S_ISUID and +S_ISGID are cleared). + +The format for running zupdate is: + +@example +zupdate [@var{options}] [@var{files}] +@end example + +@noindent +Exit status is 0 if all the compressed files were successfully recompressed +(if needed), compared, and deleted (if requested). Non-zero otherwise. + +zupdate supports the following options: + +@table @code +@item -f +@itemx --force +Don't skip a file for which a lzip compressed version already exists. +@samp{--force} compares the content of the input file with the content +of the existing lzip file and deletes the input file if both contents +are identical. + +@item -k +@itemx --keep +Keep (don't delete) the input file after comparing it with the lzip file. + +@item -l +@itemx --lzip-verbose +Pass one option @samp{-v} to the lzip compressor so that it shows the +compression ratio for each file processed. Using lzip 1.15 or newer, a +second @samp{-l} shows the progress of compression. Use it together with +@samp{-v} to see the name of the file. + +@item -q +@itemx --quiet +Quiet operation. Suppress all messages. + +@item -r +@itemx --recursive +For each directory operand, read and process all files in that directory, +recursively. Follow symbolic links given in the command line, but skip +symbolic links that are encountered recursively. + +@item -R +@itemx --dereference-recursive +For each directory operand, read and process all files in that directory, +recursively, following all symbolic links. + +@item -v +@itemx --verbose +Verbose mode. Show the files being processed. A second @samp{-v} also +shows the files being ignored. + +@item -0 .. -9 +Set the compression level of lzip. By default zupdate passes @samp{-9} to +lzip. Custom compression options can be passed to lzip with the option +@samp{--lz}. For example @w{@samp{--lz='lzip -9 -s64MiB'}}. + +@end table + + +@node Problems +@chapter Reporting bugs +@cindex bugs +@cindex getting help + +There are probably bugs in zutils. There are certainly errors and +omissions in this manual. If you report them, they will get fixed. If +you don't, no one will ever know about them and they will remain unfixed +for all eternity, if not longer. + +If you find a bug in zutils, please send electronic mail to +@email{zutils-bug@@nongnu.org}. Include the version number, which you can +find by running @w{@samp{zupdate --version}}. + + +@node Concept index +@unnumbered Concept index + +@printindex cp + +@bye @@ -0,0 +1,411 @@ +/* Zutils - Utilities dealing with compressed files + Copyright (C) 2009-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <string> +#include <vector> +#include <unistd.h> +#include <sys/wait.h> + +#include "arg_parser.h" +#include "rc.h" + + +const char * invocation_name = 0; +const char * program_name = 0; +int verbosity = 0; + +namespace { + +const char * const config_file_name = "zutilsrc"; +const char * const program_year = "2021"; + +std::string compressor_names[num_formats] = + { "bzip2", "gzip", "lzip", "xz" }; // default compressor names + +// args to compressors read from rc or from options --[bglx]z, maybe empty +std::vector< std::string > compressor_args[num_formats]; + +// vector of enabled formats plus [num_formats] for uncompressed. +// empty means all enabled. +std::vector< bool > enabled_formats; + +const struct { const char * from; const char * to; int format_index; } + known_extensions[] = { + { ".bz2", "", fmt_bz2 }, + { ".tbz", ".tar", fmt_bz2 }, + { ".tbz2", ".tar", fmt_bz2 }, + { ".gz", "", fmt_gz }, + { ".tgz", ".tar", fmt_gz }, + { ".lz", "", fmt_lz }, + { ".tlz", ".tar", fmt_lz }, + { ".xz", "", fmt_xz }, + { ".txz", ".tar", fmt_xz }, + { 0, 0, -1 } }; + + +int my_fgetc( FILE * const f ) + { + int ch; + bool comment = false; + + do { + ch = std::fgetc( f ); + if( ch == '#' ) comment = true; + else if( ch == '\n' || ch == EOF ) comment = false; + else if( ch == '\\' && comment ) + { + const int c = std::fgetc( f ); + if( c == '\n' ) { std::ungetc( c, f ); comment = false; } + } + } + while( comment ); + return ch; + } + + +// Returns the parity of escapes (backslashes) at the end of a string. +bool trailing_escape( const std::string & s ) + { + unsigned len = s.size(); + bool odd_escape = false; + while( len > 0 && s[--len] == '\\' ) odd_escape = !odd_escape; + return odd_escape; + } + + +/* Read a line discarding comments, leading whitespace, and blank lines. + Escaped newlines are discarded. + Returns the empty string if at EOF. +*/ +const std::string & my_fgets( FILE * const f, int & linenum ) + { + static std::string s; + bool strip = true; // strip leading whitespace + s.clear(); + + while( true ) + { + int ch = my_fgetc( f ); + if( strip ) + { + strip = false; + while( std::isspace( ch ) ) + { if( ch == '\n' ) { ++linenum; } ch = my_fgetc( f ); } + } + if( ch == EOF ) { if( s.size() ) { ++linenum; } break; } + else if( ch == '\n' ) + { + ++linenum; strip = true; + if( trailing_escape( s ) ) s.erase( s.size() - 1 ); + else if( s.size() ) break; + } + else s += ch; + } + return s; + } + + +bool parse_compressor_command( const std::string & s, int i, + const int format_index ) + { + const int len = s.size(); + while( i < len && std::isspace( s[i] ) ) ++i; // strip spaces + int l = i; + while( i < len && !std::isspace( s[i] ) ) ++i; + if( l >= i || s[l] == '-' ) return false; + compressor_names[format_index].assign( s, l, i - l ); + + compressor_args[format_index].clear(); + while( i < len ) + { + while( i < len && std::isspace( s[i] ) ) ++i; // strip spaces + l = i; + while( i < len && !std::isspace( s[i] ) ) ++i; + if( l < i ) + compressor_args[format_index].push_back( std::string( s, l, i - l ) ); + } + return true; + } + + +bool parse_rc_line( const std::string & line, + const char * const filename, const int linenum ) + { + const int len = line.size(); + int i = 0; + while( i < len && std::isspace( line[i] ) ) ++i; // strip spaces + int l = i; + while( i < len && line[i] != '=' && !std::isspace( line[i] ) ) ++i; + if( l >= i ) + { if( verbosity >= 0 ) + std::fprintf( stderr, "%s %d: missing format name.\n", filename, linenum ); + return false; } + const std::string name( line, l, i - l ); + int format_index = -1; + for( int j = 0; j < num_formats; ++j ) + if( name == format_names[j] ) { format_index = j; break; } + if( format_index < 0 ) + { if( verbosity >= 0 ) + std::fprintf( stderr, "%s %d: bad format name '%s'\n", + filename, linenum, name.c_str() ); + return false; } + + while( i < len && std::isspace( line[i] ) ) ++i; // strip spaces + if( i <= 0 || i >= len || line[i] != '=' ) + { if( verbosity >= 0 ) + std::fprintf( stderr, "%s %d: missing '='\n", filename, linenum ); + return false; } + ++i; // skip the '=' + if( !parse_compressor_command( line, i, format_index ) ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s %d: missing compressor name.\n", filename, linenum ); + return false; + } + return true; + } + + + // Returns 0 for success, 1 for file not found, 2 for syntax error. +int process_rcfile( const std::string & name ) + { + FILE * const f = std::fopen( name.c_str(), "r" ); + if( !f ) return 1; + + int linenum = 0; + int retval = 0; + + while( true ) + { + const std::string & line = my_fgets( f, linenum ); + if( line.empty() ) break; // EOF + if( !parse_rc_line( line, name.c_str(), linenum ) ) + { retval = 2; break; } + } + std::fclose( f ); + return retval; + } + +} // end namespace + + +bool enabled_format( const int format_index ) + { + if( enabled_formats.size() <= num_formats ) return true; // all enabled + if( format_index < 0 ) return enabled_formats[num_formats]; // uncompressed + return enabled_formats[format_index]; + } + + +void parse_format_list( const std::string & arg ) + { + const std::string un( "uncompressed" ); + bool error = arg.empty(); + enabled_formats.assign( num_formats + 1, false ); + + for( unsigned l = 0, r; l < arg.size(); l = r + 1 ) + { + r = std::min( arg.find( ',', l ), arg.size() ); + if( l >= r ) { error = true; break; } // empty format + int format_index = num_formats; + const std::string s( arg, l, r - l ); + for( int i = 0; i < num_formats; ++i ) + if( s == format_names[i] ) + { format_index = i; break; } + if( format_index == num_formats && un.find( s ) != 0 ) + { error = true; break; } + enabled_formats[format_index] = true; + } + if( error ) + { show_error( "Bad argument for option '--format'." ); std::exit( 1 ); } + } + + +int parse_format_type( const std::string & arg ) + { + for( int i = 0; i < num_formats; ++i ) + if( arg == format_names[i] ) + return i; + show_error( "Bad argument for option '--force-format'." ); + std::exit( 1 ); + } + + +int extension_index( const std::string & name ) + { + for( int eindex = 0; known_extensions[eindex].from; ++eindex ) + { + const std::string ext( known_extensions[eindex].from ); + if( name.size() > ext.size() && + name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 ) + return eindex; + } + return -1; + } + +int extension_format( const int eindex ) + { return ( eindex >= 0 ) ? known_extensions[eindex].format_index : -1; } + +const char * extension_from( const int eindex ) + { return known_extensions[eindex].from; } + +const char * extension_to( const int eindex ) + { return known_extensions[eindex].to; } + + +void maybe_process_config_file( const Arg_parser & parser ) + { + for( int i = 0; i < parser.arguments(); ++i ) + if( parser.code( i ) == 'N' ) return; + std::string name; + const char * p = std::getenv( "HOME" ); if( p ) name = p; + if( name.size() ) + { + name += "/."; name += config_file_name; + const int retval = process_rcfile( name ); + if( retval == 0 ) return; + if( retval == 2 ) std::exit( 2 ); + } + name = SYSCONFDIR; name += '/'; name += config_file_name; + const int retval = process_rcfile( name ); + if( retval == 2 ) std::exit( 2 ); + } + + +void parse_compressor( const std::string & arg, const int format_index, + const int eretval ) + { + if( !parse_compressor_command( arg, 0, format_index ) ) + { show_error( "Missing compressor name." ); std::exit( eretval ); } + } + + +const char * get_compressor_name( const int format_index ) + { + if( format_index >= 0 && format_index < num_formats && + compressor_names[format_index].size() ) + return compressor_names[format_index].c_str(); + return 0; + } + + +const std::vector< std::string > & get_compressor_args( const int format_index ) + { + return compressor_args[format_index]; + } + + +void show_help_addr() + { + std::printf( "\nReport bugs to zutils-bug@nongnu.org\n" + "Zutils home page: http://www.nongnu.org/zutils/zutils.html\n" ); + } + + +void show_version() + { + std::printf( "%s (zutils) %s\n", program_name, PROGVERSION ); + std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); + std::printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n" ); + } + + +void show_error( const char * const msg, const int errcode, const bool help ) + { + if( verbosity < 0 ) return; + if( msg && msg[0] ) + std::fprintf( stderr, "%s: %s%s%s\n", program_name, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + if( help ) + std::fprintf( stderr, "Try '%s --help' for more information.\n", + invocation_name ); + } + + +void show_file_error( const char * const filename, const char * const msg, + const int errcode ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + } + + +void internal_error( const char * const msg ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); + std::exit( 3 ); + } + + +void show_close_error( const char * const prog_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Error closing output of %s: %s\n", + program_name, prog_name, std::strerror( errno ) ); + } + + +void show_exec_error( const char * const prog_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't exec '%s': %s\n", + program_name, prog_name, std::strerror( errno ) ); + } + + +void show_fork_error( const char * const prog_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't fork '%s': %s\n", + program_name, prog_name, std::strerror( errno ) ); + } + + +int wait_for_child( const pid_t pid, const char * const name, + const int eretval, const bool isgzxz ) + { + int status; + while( waitpid( pid, &status, 0 ) == -1 ) + { + if( errno != EINTR ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Error waiting termination of '%s': %s\n", + program_name, name, std::strerror( errno ) ); + _exit( eretval ); + } + } + if( WIFEXITED( status ) ) + { + const int tmp = WEXITSTATUS( status ); + if( isgzxz && eretval == 1 && tmp == 1 ) return 2; // for ztest + return tmp; + } + return eretval; + } @@ -0,0 +1,62 @@ +/* Zutils - Utilities dealing with compressed files + Copyright (C) 2009-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +enum { fmt_bz2, fmt_gz, fmt_lz, fmt_xz, num_formats }; // format_index +const char * const format_names[num_formats] = { "bz2", "gz", "lz", "xz" }; +const char * const simple_extensions[num_formats] = + { ".bz2", ".gz", ".lz", ".xz" }; +const int format_order[num_formats] = + { fmt_lz, fmt_bz2, fmt_gz, fmt_xz }; // search order + +bool enabled_format( const int format_index ); +void parse_format_list( const std::string & arg ); +int parse_format_type( const std::string & arg ); + +int extension_index( const std::string & name ); // -1 if unknown +int extension_format( const int eindex ); // -1 if uncompressed +const char * extension_from( const int eindex ); +const char * extension_to( const int eindex ); + +extern const char * invocation_name; +extern const char * program_name; +extern int verbosity; + +class Arg_parser; + +void maybe_process_config_file( const Arg_parser & parser ); + +void parse_compressor( const std::string & arg, const int format_index, + const int eretval = 2 ); + +const char * get_compressor_name( const int format_index ); +const std::vector< std::string > & get_compressor_args( const int format_index ); + +void show_help_addr(); +void show_version(); +void show_error( const char * const msg, const int errcode = 0, + const bool help = false ); +void show_file_error( const char * const filename, const char * const msg, + const int errcode = 0 ); +void internal_error( const char * const msg ); +void show_close_error( const char * const prog_name = "data feeder" ); +void show_exec_error( const char * const prog_name ); +void show_fork_error( const char * const prog_name ); + +// Returns exit status of child process 'pid', or 'eretval' in case of error. +// +int wait_for_child( const pid_t pid, const char * const name, + const int eretval = 2, const bool isgzxz = false ); diff --git a/recursive.cc b/recursive.cc new file mode 100644 index 0000000..a69e117 --- /dev/null +++ b/recursive.cc @@ -0,0 +1,109 @@ +/* Zutils - Utilities dealing with compressed files + Copyright (C) 2009-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* Returns true if full_name is a regular file with an enabled extension + or (a link to) a directory. */ +bool test_full_name( const std::string & full_name, const struct stat * stp, + const bool follow ) + { + struct stat st, st2; + if( follow && stat( full_name.c_str(), &st ) != 0 ) return false; + if( !follow && lstat( full_name.c_str(), &st ) != 0 ) return false; + if( S_ISREG( st.st_mode ) ) // regular file + return enabled_format( extension_format( extension_index( full_name ) ) ); + if( !S_ISDIR( st.st_mode ) ) return false; + + std::string prev_dir( full_name ); + bool loop = ( stp && st.st_ino == stp->st_ino && st.st_dev == stp->st_dev ); + if( !loop ) + for( unsigned i = prev_dir.size(); i > 1; ) + { + while( i > 0 && prev_dir[i-1] != '/' ) --i; + if( i == 0 ) break; + if( i > 1 ) --i; // remove trailing slash except at root dir + prev_dir.resize( i ); + if( stat( prev_dir.c_str(), &st2 ) != 0 || !S_ISDIR( st2.st_mode ) || + ( st.st_ino == st2.st_ino && st.st_dev == st2.st_dev ) ) + { loop = true; break; } + } + if( loop ) // full_name already visited or above tree + show_file_error( full_name.c_str(), "warning: Recursive directory loop." ); + return !loop; // (link to) directory + } + + +/* Returns in input_filename the next filename, or "." for stdin. + ("." was chosen because it is not a valid filename). + Sets 'error' to true if a directory fails to open. */ +bool next_filename( std::list< std::string > & filenames, + std::string & input_filename, bool & error, + const int recursive, const bool ignore_stdin = false, + const bool no_messages = false ) + { + while( !filenames.empty() ) + { + input_filename = filenames.front(); + filenames.pop_front(); + if( input_filename == "-" ) + { + if( ignore_stdin ) continue; + input_filename = "."; return true; + } + struct stat st; + if( stat( input_filename.c_str(), &st ) == 0 && S_ISDIR( st.st_mode ) ) + { + if( recursive ) + { + DIR * const dirp = opendir( input_filename.c_str() ); + if( !dirp ) + { + if( !no_messages ) + show_file_error( input_filename.c_str(), "Can't open directory", errno ); + error = true; continue; + } + for( unsigned i = input_filename.size(); + i > 1 && input_filename[i-1] == '/'; --i ) + input_filename.resize( i - 1 ); // remove trailing slashes + struct stat stdot, *stdotp = 0; + if( input_filename[0] != '/' ) // relative path + { + if( input_filename == "." ) input_filename.clear(); + if( stat( ".", &stdot ) == 0 && S_ISDIR( stdot.st_mode ) ) + stdotp = &stdot; + } + if( input_filename.size() && input_filename != "/" ) + input_filename += '/'; + std::list< std::string > tmp_list; + while( true ) + { + const struct dirent * const entryp = readdir( dirp ); + if( !entryp ) { closedir( dirp ); break; } + const std::string tmp_name( entryp->d_name ); + if( tmp_name == "." || tmp_name == ".." ) continue; + const std::string full_name( input_filename + tmp_name ); + if( test_full_name( full_name, stdotp, recursive == 2 ) ) + tmp_list.push_back( full_name ); + } + filenames.splice( filenames.begin(), tmp_list ); + } + continue; + } + return true; + } + input_filename.clear(); + return false; + } diff --git a/testsuite/check.sh b/testsuite/check.sh new file mode 100755 index 0000000..6a6ef38 --- /dev/null +++ b/testsuite/check.sh @@ -0,0 +1,560 @@ +#! /bin/sh +# check script for Zutils - Utilities dealing with compressed files +# Copyright (C) 2009-2021 Antonio Diaz Diaz. +# +# This script is free software: you have unlimited permission +# to copy, distribute, and modify it. + +LC_ALL=C +export LC_ALL +objdir=`pwd` +testdir=`cd "$1" ; pwd` +ZCAT="${objdir}"/zcat +ZCMP="${objdir}"/zcmp +ZDIFF="${objdir}"/zdiff +ZGREP="${objdir}"/zgrep +ZEGREP="${objdir}"/zegrep +ZFGREP="${objdir}"/zfgrep +ZTEST="${objdir}"/ztest +ZUPDATE="${objdir}"/zupdate +compressors="bzip2 gzip lzip" +extensions="bz2 gz lz" +compressor_needed() { echo "${compressors} are needed to run tests" ; exit 1 ; } +framework_failure() { echo "failure in testing framework" ; exit 1 ; } + +if [ ! -f "${ZUPDATE}" ] || [ ! -x "${ZUPDATE}" ] ; then + echo "${ZUPDATE}: cannot execute" + exit 1 +fi + +[ -e "${ZUPDATE}" ] 2> /dev/null || + { + echo "$0: a POSIX shell is required to run the tests" + echo "Try bash -c \"$0 $1 $2\"" + exit 1 + } + +if [ -d tmp ] ; then rm -rf tmp ; fi +mkdir tmp +cd "${objdir}"/tmp || framework_failure + +for i in ${compressors}; do + cat "${testdir}"/test.txt > in || framework_failure + $i in || compressor_needed + printf "Hello World!\n" > hello || framework_failure + $i hello || compressor_needed +done + +cat "${testdir}"/test.txt > in || framework_failure +cat "${testdir}"/test.txt.tar > in.tar || framework_failure +printf "01234567890" > pin.tar || framework_failure +cat in.tar in.tar in.tar in.tar >> pin.tar || framework_failure +cat in > -in- || framework_failure +cat in.lz > -in-.lz || framework_failure +cat in.lz > lz_only.lz || framework_failure +cat in in in in in in > in6 || framework_failure +bad0_lz="${testdir}"/zero_bad_crc.lz +bad0_gz="${testdir}"/zero_bad_crc.gz +bad1_lz="${testdir}"/test_bad_crc.lz +touch empty empty.bz2 empty.gz empty.lz +fail=0 +test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } + +printf "testing zcat-%s..." "$2" + +for i in ${extensions}; do + "${ZCAT}" -N in.$i > copy || test_failed $LINENO $i + cmp in copy || test_failed $LINENO $i + "${ZCAT}" -N empty.$i in.$i > copy || test_failed $LINENO $i + cmp in copy || test_failed $LINENO $i + "${ZCAT}" -N --format=un in.$i > copy || test_failed $LINENO $i + cmp in copy || test_failed $LINENO $i + "${ZCAT}" -N --force-format=$i in.$i > copy || test_failed $LINENO $i + cmp in copy || test_failed $LINENO $i + "${ZCAT}" -N in.$i | dd bs=1000 count=1 > copy 2> /dev/null || + test_failed $LINENO $i + dd if=in bs=1000 count=1 2> /dev/null | cmp - copy || + test_failed $LINENO $i +done + +printf "LZIP\001-.............................." | "${ZCAT}" -N > /dev/null 2>&1 +[ $? = 1 ] || test_failed $LINENO +printf "LZIPxxxxxx" | "${ZCAT}" -N > /dev/null || test_failed $LINENO +printf "BZh9xxxxxx" | "${ZCAT}" -N > /dev/null || test_failed $LINENO +"${ZCAT}" -N -v -s "${testdir}"/zcat_vs.dat > /dev/null || test_failed $LINENO +"${ZCAT}" -N < in > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${ZCAT}" -N < in.gz > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${ZCAT}" -N < in.bz2 > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${ZCAT}" -N < in.lz > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${ZCAT}" -N -O lz - - < in.lz > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${ZCAT}" -N --lz='lzip -q' < in.lz > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${ZCAT}" -N in > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +"${ZCAT}" -N lz_only > copy || test_failed $LINENO +cmp in copy || test_failed $LINENO +cat in.lz in in in in | "${ZCAT}" -N > copy || test_failed $LINENO # tdata +cmp in copy || test_failed $LINENO +"${ZCAT}" -N in in.gz in.bz2 in.lz -- -in- -in-.lz > copy || test_failed $LINENO +cmp in6 copy || test_failed $LINENO +"${ZCAT}" -Nq in in.gz in.bz2 in.lz "${bad0_lz}" -- -in- -in-.lz > copy +[ $? = 1 ] || test_failed $LINENO +cmp in6 copy || test_failed $LINENO +"${ZCAT}" -Nq "${bad1_lz}" -- -in-.lz in in.gz in.bz2 in.lz > copy +[ $? = 1 ] || test_failed $LINENO +cmp in6 copy || test_failed $LINENO +"${ZCAT}" -N . || test_failed $LINENO +"${ZCAT}" -N -r . > /dev/null || test_failed $LINENO +"${ZCAT}" -N -r > /dev/null || test_failed $LINENO +"${ZCAT}" -N -R . > /dev/null || test_failed $LINENO +"${ZCAT}" -N -R > /dev/null || test_failed $LINENO + +"${ZCAT}" -Nq "" < in.lz > /dev/null +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --format=, in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --format=,lz in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --format=gz,,lz in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --format=lz,, in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --format=nc in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --lz='-lzip -q' in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -N --force-format=gz in.bz2 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -N --force-format=bz2 in.lz 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --force-format=lz in.gz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -N --bad-option 2> /dev/null +[ $? = 1 ] || test_failed $LINENO + + +printf "\ntesting zcmp-%s..." "$2" + +for i in ${extensions}; do + "${ZCMP}" -N in.$i || test_failed $LINENO $i + "${ZCMP}" -N in in.$i || test_failed $LINENO $i + "${ZCMP}" -N in in.$i --force-format=,$i || test_failed $LINENO $i + "${ZCMP}" -N in.$i in || test_failed $LINENO $i + "${ZCMP}" -N in.$i in --force-format=$i || test_failed $LINENO $i + "${ZCMP}" -N -i 1kB:1000 -n 500 in6 in.$i || test_failed $LINENO $i + "${ZCMP}" -N -i 1KiB:1024 -n 50 in.$i in6 || test_failed $LINENO $i + "${ZCMP}" -N empty empty.$i || test_failed $LINENO $i +done + +"${ZCMP}" -Nq in in6 +[ $? = 1 ] || test_failed $LINENO +"${ZCMP}" -N -n 0 in in6 || test_failed $LINENO +"${ZCMP}" -N -n 100B in in6 || test_failed $LINENO +"${ZCMP}" -N -n 1k in in6 || test_failed $LINENO +"${ZCMP}" -N -n 10kB in in6 || test_failed $LINENO +"${ZCMP}" -Nq in.tar pin.tar +[ $? = 1 ] || test_failed $LINENO +"${ZCMP}" -Nq -i 0B:11B in.tar pin.tar +[ $? = 1 ] || test_failed $LINENO +"${ZCMP}" -N -i 0:11 -n 0 in.tar pin.tar || test_failed $LINENO +"${ZCMP}" -N -i 0:11 -n 100 in.tar pin.tar || test_failed $LINENO +"${ZCMP}" -N -i 0:11 -n 1Ki in.tar pin.tar || test_failed $LINENO +"${ZCMP}" -N -i 0:11 -n 10KiB in.tar pin.tar || test_failed $LINENO +"${ZCMP}" -N - - || test_failed $LINENO +"${ZCMP}" -Nq - +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -N in in || test_failed $LINENO +"${ZCMP}" -N in || test_failed $LINENO +"${ZCMP}" -N --format=gz,bz2 in || test_failed $LINENO +"${ZCMP}" -N --format=gz in || test_failed $LINENO +"${ZCMP}" -N in.lz in.gz || test_failed $LINENO +"${ZCMP}" -N --lz='lzip -q' in.lz in.gz || test_failed $LINENO +"${ZCMP}" -N in.gz -- -in-.lz || test_failed $LINENO +"${ZCMP}" -N -- -in-.lz in.gz || test_failed $LINENO +"${ZCMP}" -N in -- -in-.lz || test_failed $LINENO +"${ZCMP}" -N -- -in- in.lz || test_failed $LINENO +"${ZCMP}" -N in.lz -- -in- || test_failed $LINENO +"${ZCMP}" -N -- -in-.lz in || test_failed $LINENO +"${ZCMP}" -N -- -in- in || test_failed $LINENO +"${ZCMP}" -N in -- -in- || test_failed $LINENO +"${ZCMP}" -N in.lz - < in || test_failed $LINENO +"${ZCMP}" -N - in.lz < in || test_failed $LINENO +"${ZCMP}" -N in - < in.lz || test_failed $LINENO +"${ZCMP}" -N - in < in.lz || test_failed $LINENO +"${ZCMP}" -N lz_only.lz - < in || test_failed $LINENO +"${ZCMP}" -Nq lz_only.lz +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -Nq "" in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -Nq --force-format=lz in.lz +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -Nq --force-format=lz in.gz in.lz +r=$? +{ [ $r = 1 ] || [ $r = 2 ] ; } || test_failed $LINENO +"${ZCMP}" -Nq -i 100BB in in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -Nq -i 100BB:100 in in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -Nq -i 100: in in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -Nq -n -1 in in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -N -q -n 100BB in in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -N --bad-option in in 2> /dev/null +[ $? = 2 ] || test_failed $LINENO + + +printf "\ntesting zdiff-%s..." "$2" + +for i in ${extensions}; do + "${ZDIFF}" -N in.$i > /dev/null || test_failed $LINENO $i + "${ZDIFF}" -N in in.$i > /dev/null || test_failed $LINENO $i + "${ZDIFF}" -N in in.$i --force-format=,$i > /dev/null || + test_failed $LINENO $i + "${ZDIFF}" -N in.$i in > /dev/null || test_failed $LINENO $i + "${ZDIFF}" -N in.$i in --force-format=$i, > /dev/null || + test_failed $LINENO $i + "${ZDIFF}" -N empty empty.$i > /dev/null || test_failed $LINENO $i +done + +"${ZDIFF}" -N in in6 > /dev/null +[ $? = 1 ] || test_failed $LINENO +# GNU diff 3.0 returns 2 when binary files differ +"${ZDIFF}" -N in.tar pin.tar > /dev/null && test_failed $LINENO +"${ZDIFF}" -N - - || test_failed $LINENO +"${ZDIFF}" -N - 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N in in || test_failed $LINENO +"${ZDIFF}" -N in || test_failed $LINENO +"${ZDIFF}" -N --format=gz,bz2 in || test_failed $LINENO +"${ZDIFF}" -N --format=gz in || test_failed $LINENO +"${ZDIFF}" -N in.lz in.gz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N --lz='lzip -q' in.lz in.gz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in.gz -- -in-.lz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N -- -in-.lz in.gz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in -- -in-.lz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N -- -in- in.lz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in.lz -- -in- > /dev/null || test_failed $LINENO +"${ZDIFF}" -N -- -in-.lz in > /dev/null || test_failed $LINENO +"${ZDIFF}" -N -- -in- in > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in -- -in- > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in.lz - < in > /dev/null || test_failed $LINENO +"${ZDIFF}" -N - in.lz < in > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in - < in.lz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N - in < in.lz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N lz_only.lz - < in > /dev/null || test_failed $LINENO +"${ZDIFF}" -N lz_only.lz 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N "" in 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --bz2='-bzip2' in.bz2 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -Nq --force-format=bz2 in.bz2 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N -q --force-format=,lz in.lz in.bz2 > /dev/null 2>&1 +r=$? +{ [ $r = 1 ] || [ $r = 2 ] ; } || test_failed $LINENO +"${ZDIFF}" -N --bad-option 2> /dev/null +[ $? = 2 ] || test_failed $LINENO + +mkdir tmp2 +cat in > tmp2/a || framework_failure +cat in.lz > tmp2/a.lz || framework_failure +"${ZDIFF}" -N --format=bz2 tmp2/a 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --format=gz tmp2/a 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --format=lz tmp2/a.lz 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --format=lz tmp2/a || test_failed $LINENO +"${ZDIFF}" -N --format=un tmp2/a.lz || test_failed $LINENO +rm -r tmp2 || framework_failure + + +printf "\ntesting zgrep-%s..." "$2" + +for i in ${extensions}; do + "${ZGREP}" -N "GNU" in.$i > /dev/null || test_failed $LINENO $i + "${ZGREP}" -N "GNU" in.$i hello.$i > /dev/null || test_failed $LINENO $i + "${ZGREP}" -N "GNU" hello.$i in.$i > /dev/null || test_failed $LINENO $i + "${ZGREP}" -N -q "GNU" in.$i hello.$i || test_failed $LINENO $i + "${ZGREP}" -N -q "GNU" hello.$i in.$i || test_failed $LINENO $i + "${ZGREP}" -N "GNU" < in.$i > /dev/null || test_failed $LINENO $i + "${ZGREP}" -N -l "GNU" in.$i > /dev/null || test_failed $LINENO $i + "${ZGREP}" -N -L "GNU" in.$i || test_failed $LINENO $i + "${ZGREP}" -N --force-format=$i "GNU" in.$i > /dev/null || + test_failed $LINENO $i + "${ZGREP}" -N -v "nx_pattern" in.$i > /dev/null || + test_failed $LINENO $i + "${ZGREP}" -N "nx_pattern" in.$i && test_failed $LINENO $i + "${ZGREP}" -N -l "nx_pattern" in.$i && test_failed $LINENO $i + "${ZGREP}" -N -L "nx_pattern" in.$i > /dev/null && + test_failed $LINENO $i + "${ZGREP}" -N --force-format=$i "GNU" in 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + "${ZGREP}" -N "nx_pattern" empty.$i && test_failed $LINENO $i +done + +"${ZGREP}" -N pin.tar -e "GNU" > /dev/null || test_failed $LINENO +"${ZGREP}" -N "GNU" < pin.tar > /dev/null || test_failed $LINENO +"${ZGREP}" -N -r "GNU" . > /dev/null || test_failed $LINENO +"${ZGREP}" -N -r "GNU" > /dev/null || test_failed $LINENO +"${ZGREP}" -N -R "GNU" . > /dev/null || test_failed $LINENO +"${ZGREP}" -N -R "GNU" > /dev/null || test_failed $LINENO +"${ZGREP}" -N "nx_pattern" -r . in > /dev/null && test_failed $LINENO +"${ZGREP}" -N -e "GNU" in > /dev/null || test_failed $LINENO +"${ZGREP}" -N "GNU" < in > /dev/null || test_failed $LINENO +"${ZGREP}" -N -O lz "nx_pattern" - - < in.lz > /dev/null && test_failed $LINENO +"${ZGREP}" -N -e "-free" --lz='lzip -q' < in.lz > /dev/null || + test_failed $LINENO +"${ZGREP}" -N -- "-free" -in- > /dev/null || test_failed $LINENO +"${ZGREP}" -N -q -- "-free" nx_file -in-.lz || test_failed $LINENO +"${ZGREP}" -N "GNU" in in.gz in.bz2 in.lz -- -in- > /dev/null || + test_failed $LINENO +"${ZGREP}" -N -l "GNU" in in.gz in.bz2 in.lz -- -in- > /dev/null || + test_failed $LINENO +"${ZGREP}" -N -L "GNU" in in.gz in.bz2 in.lz -- -in- || test_failed $LINENO +"${ZGREP}" -N -l "nx_pattern" in in.gz in.bz2 in.lz -- -in- && + test_failed $LINENO +"${ZGREP}" -N -L "nx_pattern" in in.gz in.bz2 in.lz -- -in- > /dev/null && + test_failed $LINENO +"${ZGREP}" -Nq -l "01234567890" in "${bad1_lz}" in.lz && test_failed $LINENO +"${ZGREP}" -Nq -l "01234567890" in "${bad1_lz}" in.lz pin.tar > /dev/null || + test_failed $LINENO + +"${ZGREP}" -N "GNU" . +[ $? = 1 ] || test_failed $LINENO +"${ZGREP}" -N "GNU" "" < in.lz 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZGREP}" -N --bad-option 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZGREP}" -N "GNU" -s nx_file +[ $? = 2 ] || test_failed $LINENO +"${ZGREP}" -N -q +[ $? = 2 ] || test_failed $LINENO + +"${ZEGREP}" -N "GNU" in > /dev/null || test_failed $LINENO +"${ZFGREP}" -N "GNU" in > /dev/null || test_failed $LINENO + + +printf "\ntesting ztest-%s..." "$2" + +for i in ${extensions}; do + "${ZTEST}" -N --force-format=$i < in.$i || test_failed $LINENO $i + "${ZTEST}" -N --force-format=$i < in 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + "${ZTEST}" -N --force-format=$i in 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i +done + +"${ZTEST}" -N in in.gz in.bz2 in.lz -- -in- || test_failed $LINENO +"${ZTEST}" -N < in.gz || test_failed $LINENO +"${ZTEST}" -N < in.bz2 || test_failed $LINENO +"${ZTEST}" -N < in.lz || test_failed $LINENO +"${ZTEST}" -N - < in.lz || test_failed $LINENO +"${ZTEST}" -N - in.gz - < in.lz || test_failed $LINENO +"${ZTEST}" -N --lz='lzip -q' < in.lz || test_failed $LINENO +"${ZTEST}" -N -r . || test_failed $LINENO +"${ZTEST}" -N -r || test_failed $LINENO +"${ZTEST}" -N -R . || test_failed $LINENO +"${ZTEST}" -N -R || test_failed $LINENO +"${ZTEST}" -N empty empty.bz2 empty.gz empty.lz || test_failed $LINENO + +"${ZTEST}" -Nq in.gz "${bad0_lz}" in.bz2 "${bad1_lz}" in.lz +[ $? = 2 ] || test_failed $LINENO +lines=`"${ZTEST}" -N in.gz "${bad0_lz}" in.bz2 "${bad1_lz}" in.lz 2>&1 | wc -l` +[ "${lines}" -eq 2 ] || test_failed $LINENO "${lines}" +lines=`"${ZTEST}" -Nv in.gz "${bad0_lz}" in.bz2 "${bad1_lz}" in.lz 2>&1 | wc -l` +[ "${lines}" -eq 6 ] || test_failed $LINENO "${lines}" +"${ZTEST}" -Nq < in +[ $? = 2 ] || test_failed $LINENO +"${ZTEST}" -Nq "" < in.lz +[ $? = 1 ] || test_failed $LINENO +dd if=in.lz bs=1000 count=1 2> /dev/null | "${ZTEST}" -N -q +[ $? = 2 ] || test_failed $LINENO +"${ZTEST}" -Nq --force-format=lz in.bz2 +[ $? = 2 ] || test_failed $LINENO +"${ZTEST}" -N --lz='lzip --bad-option' in.lz 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${ZTEST}" -N --bad-option 2> /dev/null +[ $? = 1 ] || test_failed $LINENO + + +printf "\ntesting zupdate-%s..." "$2" + +"${ZUPDATE}" -N "" || test_failed $LINENO +cat in.bz2 > a.bz2 || framework_failure +cat in.gz > a.gz || framework_failure +"${ZUPDATE}" -Nq --bz2=bad_command a.bz2 +[ $? = 1 ] || test_failed $LINENO +"${ZUPDATE}" -N --bz2='bzip2 --bad-option' a.bz2 > /dev/null 2>&1 +[ $? = 1 ] || test_failed $LINENO +"${ZUPDATE}" -Nq --gz=bad_command a.gz +[ $? = 1 ] || test_failed $LINENO +"${ZUPDATE}" -N --gz='gzip --bad-option' a.gz 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${ZUPDATE}" -Nq --lz=bad_command a.gz +[ $? = 1 ] || test_failed $LINENO +"${ZUPDATE}" -N --lz='lzip --bad-option' a.gz 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${ZUPDATE}" -N --bad-option 2> /dev/null +[ $? = 1 ] || test_failed $LINENO + +cat in.lz in.lz > a.lz || framework_failure +"${ZUPDATE}" -Nq -f a.bz2 a.gz +[ $? = 1 ] || test_failed $LINENO +[ -e a.bz2 ] || test_failed $LINENO +[ -e a.gz ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +"${ZUPDATE}" -N a.bz2 || test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ -e a.gz ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure +"${ZUPDATE}" -N a.gz || test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ ! -e a.gz ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +cat in.bz2 > a.bz2 || framework_failure +cat in.gz > a.gz || framework_failure +"${ZUPDATE}" -Nq a.bz2 a.gz +[ $? = 1 ] || test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ -e a.gz ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +cat in.bz2 > a.bz2 || framework_failure +cat in.gz > a.gz || framework_failure +"${ZUPDATE}" -N -f -k a.bz2 a.gz || test_failed $LINENO +[ -e a.bz2 ] || test_failed $LINENO +[ -e a.gz ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +cat in.bz2 > a.bz2 || framework_failure +cat in.gz > a.gz || framework_failure +"${ZUPDATE}" -N -f a.bz2 a.gz || test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ ! -e a.gz ] || test_failed $LINENO +[ ! -e a ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +cat in.bz2 > a.tbz || framework_failure # keep combined extensions +cat in.bz2 > b.tbz2 || framework_failure +cat in.gz > c.tgz || framework_failure +"${ZUPDATE}" -N a.tbz b.tbz2 c.tgz || test_failed $LINENO +[ ! -e a.tbz ] || test_failed $LINENO +[ ! -e b.tbz2 ] || test_failed $LINENO +[ ! -e c.tgz ] || test_failed $LINENO +[ ! -e a ] || test_failed $LINENO +[ ! -e b ] || test_failed $LINENO +[ ! -e c ] || test_failed $LINENO +[ ! -e a.lz ] || test_failed $LINENO +[ ! -e b.lz ] || test_failed $LINENO +[ ! -e c.lz ] || test_failed $LINENO +[ -e a.tlz ] || test_failed $LINENO +[ -e b.tlz ] || test_failed $LINENO +[ -e c.tlz ] || test_failed $LINENO +rm -f a.tlz b.tlz c.tlz || framework_failure + +cat in.bz2 > a.bz2 || framework_failure +cat "${bad0_gz}" > b.gz || framework_failure +cat in.gz > c.gz || framework_failure +"${ZUPDATE}" -N -f a.bz2 b.gz c.gz 2> /dev/null && test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ -e b.gz ] || test_failed $LINENO +[ -e c.gz ] || test_failed $LINENO +[ ! -e a ] || test_failed $LINENO +[ ! -e b ] || test_failed $LINENO +[ ! -e c ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz b.gz c.gz || framework_failure + +cat in.bz2 > a.bz2 || framework_failure +"${ZUPDATE}" -N -1 -q a.bz2 || test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +cat in.gz > 'name with spaces.gz' || framework_failure +"${ZUPDATE}" -N -1 -q 'name with spaces.gz' || test_failed $LINENO +[ ! -e 'name with spaces.gz' ] || test_failed $LINENO +[ -e 'name with spaces.lz' ] || test_failed $LINENO +"${ZCMP}" -N in 'name with spaces.lz' || test_failed $LINENO +rm -f 'name with spaces.lz' || framework_failure + +mkdir tmp2 +mkdir tmp2/tmp3 +cat in.bz2 > tmp2/tmp3/a.bz2 || framework_failure +cat in.gz > tmp2/tmp3/a.gz || framework_failure +"${ZUPDATE}" -N -r --format=gz tmp2 || test_failed $LINENO +[ -e tmp2/tmp3/a.bz2 ] || test_failed $LINENO +[ ! -e tmp2/tmp3/a.gz ] || test_failed $LINENO +[ -e tmp2/tmp3/a.lz ] || test_failed $LINENO +rm -f tmp2/tmp3/a.lz || framework_failure +"${ZUPDATE}" -N -r --format=bz2 tmp2 || test_failed $LINENO +[ ! -e tmp2/tmp3/a.bz2 ] || test_failed $LINENO +[ ! -e tmp2/tmp3/a.gz ] || test_failed $LINENO +[ -e tmp2/tmp3/a.lz ] || test_failed $LINENO +rm -f tmp2/tmp3/a.lz || framework_failure +cat in.bz2 > tmp2/tmp3/a.bz2 || framework_failure +cat in.gz > tmp2/tmp3/a.gz || framework_failure +cd tmp2 || framework_failure +"${ZUPDATE}" -N -r -k -f . || test_failed $LINENO +[ -e tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp3/a.gz ] || test_failed $LINENO +[ -e tmp3/a.lz ] || test_failed $LINENO +rm -f tmp3/a.lz || framework_failure +"${ZUPDATE}" -N -r -k -f || test_failed $LINENO +[ -e tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp3/a.gz ] || test_failed $LINENO +[ -e tmp3/a.lz ] || test_failed $LINENO +rm -f tmp3/a.lz || framework_failure +"${ZUPDATE}" -N -R -k -f . || test_failed $LINENO +[ -e tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp3/a.gz ] || test_failed $LINENO +[ -e tmp3/a.lz ] || test_failed $LINENO +rm -f tmp3/a.lz || framework_failure +"${ZUPDATE}" -N -R -k -f || test_failed $LINENO +[ -e tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp3/a.gz ] || test_failed $LINENO +[ -e tmp3/a.lz ] || test_failed $LINENO +rm -f tmp3/a.lz || framework_failure +"${ZUPDATE}" -N -r -f . || test_failed $LINENO +[ ! -e tmp3/a.bz2 ] || test_failed $LINENO +[ ! -e tmp3/a.gz ] || test_failed $LINENO +[ -e tmp3/a.lz ] || test_failed $LINENO +cd .. || framework_failure +rm -r tmp2 || framework_failure + +rm -f empty empty.bz2 empty.gz empty.lz || framework_failure +if ln -s '.' slink 2> /dev/null ; then + "${ZCAT}" -N -r slink > /dev/null || test_failed $LINENO + "${ZGREP}" -N -r "GNU" slink > /dev/null || test_failed $LINENO + "${ZTEST}" -N -r slink || test_failed $LINENO + "${ZUPDATE}" -N -r -f slink || test_failed $LINENO +else + printf "\nwarning: skipping link test: 'ln' does not work on your system." +fi +rm -f slink || framework_failure + +echo +if [ ${fail} = 0 ] ; then + echo "tests completed successfully." + cd "${objdir}" && rm -r tmp +else + echo "tests failed." +fi +exit ${fail} diff --git a/testsuite/test.txt b/testsuite/test.txt new file mode 100644 index 0000000..9196a3a --- /dev/null +++ b/testsuite/test.txt @@ -0,0 +1,676 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) <year> <name of author> + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. + GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) <year> <name of author>
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/testsuite/test.txt.tar b/testsuite/test.txt.tar Binary files differnew file mode 100644 index 0000000..92d6f1b --- /dev/null +++ b/testsuite/test.txt.tar diff --git a/testsuite/test_bad_crc.lz b/testsuite/test_bad_crc.lz Binary files differnew file mode 100644 index 0000000..c7d5bc9 --- /dev/null +++ b/testsuite/test_bad_crc.lz diff --git a/testsuite/zcat_vs.dat b/testsuite/zcat_vs.dat new file mode 100644 index 0000000..29978fd --- /dev/null +++ b/testsuite/zcat_vs.dat @@ -0,0 +1,68 @@ +Worst case test file for zcat -vs. +First 4096 input bytes produce 4095 output bytes because of -s. +Next 4096 input bytes produce 16384 output bytes, accumulating a total +of 20479 bytes in the output buffer€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€
\ No newline at end of file diff --git a/testsuite/zero_bad_crc.gz b/testsuite/zero_bad_crc.gz Binary files differnew file mode 100644 index 0000000..a2a9991 --- /dev/null +++ b/testsuite/zero_bad_crc.gz diff --git a/testsuite/zero_bad_crc.lz b/testsuite/zero_bad_crc.lz Binary files differnew file mode 100644 index 0000000..0d3cc93 --- /dev/null +++ b/testsuite/zero_bad_crc.lz @@ -0,0 +1,386 @@ +/* Zcat - decompress and concatenate files to standard output + Copyright (C) 2010-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <list> +#include <string> +#include <vector> +#include <dirent.h> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> +#if defined(__MSVCRT__) || defined(__OS2__) +#include <io.h> +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + + +namespace { + +#include "recursive.cc" +#include "zcatgrep.cc" + +struct Cat_options + { + int number_lines; // 0 = no, 1 = nonblank, 2 = all + bool show_ends; + bool show_nonprinting; + bool show_tabs; + bool squeeze_blank; + + Cat_options() + : number_lines( 0 ), show_ends( false ), show_nonprinting( false ), + show_tabs( false ), squeeze_blank( false ) {} + }; + + +class Line_number // unlimited size line counter + { + std::string str; + unsigned first_digit_pos; + +public: + Line_number() : str( " 0\t" ), first_digit_pos( 5 ) {} + + void next() + { + for( unsigned i = str.size() - 1; i > first_digit_pos; ) + { + if( str[--i] < '9' ) { ++str[i]; return; } + str[i] = '0'; + } + if( first_digit_pos > 0 ) str[--first_digit_pos] = '1'; + else str.insert( str.begin() + first_digit_pos, '1' ); + } + + int sprint( uint8_t * const buf ) + { + std::memcpy( buf, str.c_str(), str.size() ); + return str.size(); + } + }; + +Line_number line_number; + + +void show_help() + { + std::printf( "zcat copies each file argument to standard output in sequence. If any\n" + "file given is compressed, its decompressed content is copied. If a file\n" + "given does not exist, and its name does not end with one of the known\n" + "extensions, zcat tries the compressed file names corresponding to the\n" + "formats supported. If a file fails to decompress, zcat continues copying the\n" + "rest of the files.\n" + "\nIf a file is specified as '-', data are read from standard input,\n" + "decompressed if needed, and sent to standard output. Data read from\n" + "standard input must be of the same type; all uncompressed or all in the\n" + "same compressed format.\n" + "\nIf no files are specified, recursive searches examine the current\n" + "working directory, and nonrecursive searches read standard input.\n" + "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" + "\nUsage: zcat [options] [files]\n" + "\nExit status is 0 if no errors occurred, 1 otherwise.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -A, --show-all equivalent to '-vET'\n" + " -b, --number-nonblank number nonblank output lines\n" + " -e equivalent to '-vE'\n" + " -E, --show-ends display '$' at end of each line\n" + " -M, --format=<list> process only the formats in <list>\n" + " -n, --number number all output lines\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -O, --force-format=<fmt> force the format given (bz2, gz, lz, xz)\n" + " -q, --quiet suppress all messages\n" + " -r, --recursive operate recursively on directories\n" + " -R, --dereference-recursive recursively follow symbolic links\n" + " -s, --squeeze-blank never more than one single blank line\n" + " -t equivalent to '-vT'\n" + " -T, --show-tabs display TAB characters as '^I'\n" + " -v, --show-nonprinting use '^' and 'M-' notation, except for LF and TAB\n" + " --verbose verbose mode (show error messages)\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" ); + show_help_addr(); + } + + +bool do_cat( const int infd, const int buffer_size, + uint8_t * const inbuf, uint8_t * const outbuf, + const std::string & input_filename, + const Cat_options & cat_options ) + { + static int at_bol = 1; // at begin of line. 0 = false, 1 = true, + // 2 = at begin of second blank line. + int inpos = 0; // positions in buffers + int outpos = 0; + int rd = -1; // bytes read by the last readblock + unsigned char c; + + while( true ) + { + do { + if( outpos >= buffer_size ) + { + if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos ) + { show_error( "Write error", errno ); return false; } + outpos = 0; + } + if( inpos > rd ) // inbuf is empty + { + rd = readblock( infd, inbuf, buffer_size ); + if( rd != buffer_size && errno ) + { + show_file_error( input_filename.c_str(), "Read error", errno ); + return false; + } + if( rd == 0 ) + { + if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos ) + { show_error( "Write error", errno ); return false; } + outpos = 0; + return true; + } + inpos = 0; + inbuf[rd] = '\n'; // sentinel newline + } + else // a real newline was found + { + if( at_bol > 1 ) + { + if( cat_options.squeeze_blank ) { c = inbuf[inpos++]; continue; } + } + else ++at_bol; + if( at_bol > 1 && cat_options.number_lines == 2 ) + { + line_number.next(); + outpos += line_number.sprint( &outbuf[outpos] ); + } + if( cat_options.show_ends ) outbuf[outpos++] = '$'; + outbuf[outpos++] = '\n'; // output the newline + } + c = inbuf[inpos++]; + } + while( c == '\n' ); + + if( at_bol > 0 && cat_options.number_lines ) + { + line_number.next(); + outpos += line_number.sprint( &outbuf[outpos] ); + } + at_bol = 0; + + // the loops below continue until a newline (real or sentinel) is found + + if( cat_options.show_nonprinting ) + while( true ) + { + if( c < 32 || c >= 127 ) + { + if( c == '\n' ) break; + if( c != '\t' || cat_options.show_tabs ) + { + if( c >= 128 ) + { c -= 128; outbuf[outpos++] = 'M'; outbuf[outpos++] = '-'; } + if( c < 32 ) { c += 64; outbuf[outpos++] = '^'; } + else if( c == 127 ) { c = '?'; outbuf[outpos++] = '^'; } + } + } + outbuf[outpos++] = c; + c = inbuf[inpos++]; + } + else // not quoting + while( c != '\n' ) + { + if( c == '\t' && cat_options.show_tabs ) + { c += 64; outbuf[outpos++] = '^'; } + outbuf[outpos++] = c; + c = inbuf[inpos++]; + } + } + } + + +bool cat( int infd, const int format_index, const std::string & input_filename, + const Cat_options & cat_options ) + { + enum { buffer_size = 4096, outbuf_size = (5 * buffer_size) + 256 + 1 }; + // input buffer with space for sentinel newline at the end + uint8_t * const inbuf = new uint8_t[buffer_size+1]; + // output buffer with space for character quoting, 255-digit line number, + // worst case flushing respect to inbuf, and a canary byte. + uint8_t * const outbuf = new uint8_t[outbuf_size]; + outbuf[outbuf_size-1] = 0; + Children children; + bool error = false; + + if( !set_data_feeder( input_filename, &infd, children, format_index ) || + !do_cat( infd, buffer_size, inbuf, outbuf, input_filename, cat_options ) ) + error = true; + if( !good_status( children, !error ) ) error = true; + if( !error && close( infd ) != 0 ) { show_close_error(); error = true; } + if( outbuf[outbuf_size-1] != 0 ) internal_error( "buffer overflow." ); + delete[] outbuf; delete[] inbuf; + return !error; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { verbose_opt = 256, bz2_opt, gz_opt, lz_opt, xz_opt }; + int format_index = -1; + int recursive = 0; // 1 = '-r', 2 = '-R' + std::list< std::string > filenames; + Cat_options cat_options; + program_name = "zcat"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'A', "show-all", Arg_parser::no }, // cat + { 'b', "number-nonblank", Arg_parser::no }, // cat + { 'c', "stdout", Arg_parser::no }, // gzip + { 'd', "decompress", Arg_parser::no }, // gzip + { 'e', 0, Arg_parser::no }, // cat + { 'E', "show-ends", Arg_parser::no }, // cat + { 'f', "force", Arg_parser::no }, // gzip + { 'h', "help", Arg_parser::no }, + { 'l', "list", Arg_parser::no }, // gzip + { 'L', "license", Arg_parser::no }, // gzip + { 'M', "format", Arg_parser::yes }, + { 'n', "number", Arg_parser::no }, // cat + { 'N', "no-rcfile", Arg_parser::no }, + { 'O', "force-format", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'R', "dereference-recursive", Arg_parser::no }, + { 's', "squeeze-blank", Arg_parser::no }, // cat + { 't', 0, Arg_parser::no }, // cat + { 'T', "show-tabs", Arg_parser::no }, // cat + { 'v', "show-nonprinting", Arg_parser::no }, // cat + { 'V', "version", Arg_parser::no }, + { verbose_opt, "verbose", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { 0 , 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 1; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case 'A': cat_options.show_ends = true; + cat_options.show_nonprinting = true; + cat_options.show_tabs = true; break; + case 'b': cat_options.number_lines = 1; break; + case 'c': break; + case 'd': break; + case 'e': cat_options.show_nonprinting = true; // fall through + case 'E': cat_options.show_ends = true; break; + case 'f': break; + case 'h': show_help(); return 0; + case 'l': break; + case 'L': break; + case 'M': parse_format_list( arg ); break; + case 'n': if( cat_options.number_lines == 0 ) + { cat_options.number_lines = 2; } break; + case 'N': break; + case 'O': format_index = parse_format_type( arg ); break; + case 'q': verbosity = -1; break; + case 'r': recursive = 1; break; + case 'R': recursive = 2; break; + case 's': cat_options.squeeze_blank = true; break; + case 't': cat_options.show_nonprinting = true; // fall through + case 'T': cat_options.show_tabs = true; break; + case 'v': cat_options.show_nonprinting = true; break; + case 'V': show_version(); return 0; + case verbose_opt: if( verbosity < 4 ) ++verbosity; break; + case bz2_opt: parse_compressor( arg, fmt_bz2, 1 ); break; + case gz_opt: parse_compressor( arg, fmt_gz, 1 ); break; + case lz_opt: parse_compressor( arg, fmt_lz, 1 ); break; + case xz_opt: parse_compressor( arg, fmt_xz, 1 ); break; + default : internal_error( "uncaught option." ); + } + } // end process options + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + for( ; argind < parser.arguments(); ++argind ) + filenames.push_back( parser.argument( argind ) ); + + if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" ); + + std::string input_filename; + bool error = false; + bool stdin_used = false; + while( next_filename( filenames, input_filename, error, recursive ) ) + { + int infd; + if( input_filename == "." ) + { + if( stdin_used ) continue; else stdin_used = true; + infd = STDIN_FILENO; input_filename = "-"; + } + else + { + infd = open_instream( input_filename, format_index < 0 ); + if( infd < 0 ) { error = true; continue; } + } + + if( !cat( infd, format_index, input_filename, cat_options ) ) error = true; + + if( close( infd ) != 0 ) + { show_file_error( input_filename.c_str(), "Error closing input file", + errno ); error = true; } + } + + if( std::fclose( stdout ) != 0 ) + { + show_error( "Error closing stdout", errno ); + error = true; + } + return error; + } diff --git a/zcatgrep.cc b/zcatgrep.cc new file mode 100644 index 0000000..31d54e6 --- /dev/null +++ b/zcatgrep.cc @@ -0,0 +1,59 @@ +/* Common code for zcat and zgrep + Copyright (C) 2010-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +int simple_extension_index( const std::string & name ) + { + for( int i = 0; i < num_formats; ++i ) + { + const std::string ext( simple_extensions[i] ); + if( name.size() > ext.size() && + name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 ) + return i; + } + return -1; + } + + +int open_instream( std::string & input_filename, const bool search, + const bool no_messages = false ) + { + int infd = open( input_filename.c_str(), O_RDONLY | O_BINARY ); + if( infd < 0 ) + { + const int saved_errno = errno; + if( search && simple_extension_index( input_filename ) < 0 ) + { + for( int i = 0; i < num_formats; ++i ) + if( enabled_format( format_order[i] ) ) + { + const std::string name( input_filename + + simple_extensions[format_order[i]] ); + infd = open( name.c_str(), O_RDONLY | O_BINARY ); + if( infd >= 0 ) { input_filename = name; break; } + } + } + if( infd < 0 && !no_messages ) + show_file_error( input_filename.c_str(), "Can't open input file", + saved_errno ); + } + return infd; + } @@ -0,0 +1,471 @@ +/* Zcmp - decompress and compare two files byte by byte + Copyright (C) 2010-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cctype> +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <string> +#include <vector> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> +#if defined(__MSVCRT__) || defined(__OS2__) +#include <io.h> +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + +#ifndef LLONG_MAX +#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL +#endif + + +namespace { + +#include "zcmpdiff.cc" + +void show_help() + { + std::printf( "zcmp compares two files and, if they differ, writes to standard output the\n" + "first byte and line number where they differ. Bytes and lines are numbered\n" + "starting with 1. A hyphen '-' used as a file argument means standard input.\n" + "If any file given is compressed, its decompressed content is used. Compressed\n" + "files are decompressed on the fly; no temporary files are created.\n" + "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" + "\nUsage: zcmp [options] file1 [file2]\n" + "\nzcmp compares file1 to file2. The standard input is used only if file1 or\n" + "file2 refers to standard input. If file2 is omitted zcmp tries the\n" + "following:\n" + "\n - If file1 is compressed, compares its decompressed contents with\n" + " the corresponding uncompressed file (the name of file1 with the\n" + " extension removed).\n" + "\n - If file1 is uncompressed, compares it with the decompressed\n" + " contents of file1.[lz|bz2|gz|xz] (the first one that is found).\n" + "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -b, --print-bytes print differing bytes\n" + " -i, --ignore-initial=<n>[:<n2>] ignore differences in the first <n> bytes\n" + " -l, --list list position, value of all differing bytes\n" + " -M, --format=<list> process only the formats in <list>\n" + " -n, --bytes=<n> compare at most <n> bytes\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -O, --force-format=[<f1>][,<f2>] force the formats given (bz2, gz, lz, xz)\n" + " -q, --quiet suppress all messages\n" + " -s, --silent (same as --quiet)\n" + " -v, --verbose verbose mode (same as --list)\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" + "\nNumbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); + show_help_addr(); + } + + +long long getnum( const char * const ptr, const char ** const tailp = 0, + const long long llimit = 0, + const long long ulimit = LLONG_MAX ) + { + char * tail; + errno = 0; + long long result = strtoll( ptr, &tail, 0 ); + if( tail == ptr ) + { + show_error( "Bad or missing numerical argument.", 0, true ); + std::exit( 2 ); + } + if( result < 0 ) errno = ERANGE; + + if( !errno && tail[0] && std::isalpha( tail[0] ) ) + { + const unsigned char ch = *tail++; + int factor; + bool bsuf; // 'B' suffix is present + if( tail[0] == 'i' ) { ++tail; factor = 1024; } else factor = 1000; + if( tail[0] == 'B' ) { ++tail; bsuf = true; } else bsuf = false; + int exponent = -1; // -1 = bad multiplier + switch( ch ) + { + case 'Y': exponent = 8; break; + case 'Z': exponent = 7; break; + case 'E': exponent = 6; break; + case 'P': exponent = 5; break; + case 'T': exponent = 4; break; + case 'G': exponent = 3; break; + case 'M': exponent = 2; break; + case 'K': if( factor == 1024 ) exponent = 1; break; + case 'k': if( factor == 1000 ) exponent = 1; break; + case 'B': if( factor == 1000 && !bsuf ) exponent = 0; break; + } + if( exponent < 0 ) + { + show_error( "Bad multiplier in numerical argument.", 0, true ); + std::exit( 2 ); + } + for( int i = 0; i < exponent; ++i ) + { + if( ulimit / factor >= result ) result *= factor; + else { errno = ERANGE; break; } + } + } + if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; + if( errno ) + { + show_error( "Numerical argument out of limits." ); + std::exit( 2 ); + } + if( tailp ) *tailp = tail; + return result; + } + + +void parse_ignore_initial( const char * const arg, long long ignore_initial[2] ) + { + const char * tail; + ignore_initial[0] = getnum( arg, &tail ); + if( *tail == ':' || *tail == ',' ) + ignore_initial[1] = getnum( ++tail ); + else if( *tail == 0 ) ignore_initial[1] = ignore_initial[0]; + else + { + show_error( "Bad separator in argument of '--ignore-initial'", 0, true ); + std::exit( 2 ); + } + } + + +bool skip_ignore_initial( const long long ignore_initial, const int infd ) + { + if( ignore_initial > 0 ) + { + enum { buffer_size = 4096 }; + long long rest = ignore_initial; + uint8_t buffer[buffer_size]; + while( rest > 0 ) + { + const int size = std::min( rest, (long long)buffer_size ); + const int rd = readblock( infd, buffer, size ); + if( rd != size && errno ) return false; + if( rd < size ) break; + rest -= rd; + } + } + return true; + } + + +// Put into buf the unsigned char c, making unprintable bytes +// visible by quoting like cat -t does. +void sprintc( char * const buf, unsigned char c ) + { + int i = 0; + + if( c < 32 || c >= 127 ) + { + if( c >= 128 ) { c -= 128; buf[i++] = 'M'; buf[i++] = '-'; } + if( c < 32 ) { c += 64; buf[i++] = '^'; } + else if( c == 127 ) { c = '?'; buf[i++] = '^'; } + } + buf[i++] = c; + buf[i++] = 0; + } + + +int block_compare( const uint8_t * const buffer0, + const uint8_t * const buffer1, + unsigned long long * const line_numberp ) + { + const uint8_t * p0 = buffer0; + const uint8_t * p1 = buffer1; + + if( verbosity == 0 ) + { + int nl_count = 0; + while( *p0 == *p1 ) + { if( *p0 == '\n' ) { ++nl_count; } ++p0; ++p1; } + *line_numberp += nl_count; + } + else while( *p0 == *p1 ) { ++p0; ++p1; } + return p0 - buffer0; + } + + +int cmp( const long long max_size, const int infd[2], + const std::string filenames[2], const bool print_bytes ) + { + const int buffer_size = 4096; + unsigned long long byte_number = 1; + unsigned long long line_number = 1; + // remaining number of bytes to compare + long long rest = ( max_size >= 0 ) ? max_size : buffer_size; + // buffers with space for sentinels at the end + uint8_t * const buffer0 = new uint8_t[2*(buffer_size+1)]; + uint8_t * const buffer1 = buffer0 + buffer_size + 1; + uint8_t * buffer[2]; + buffer[0] = buffer0; buffer[1] = buffer1; + int different = 0; + + while( rest > 0 ) + { + const int size = std::min( (long long)buffer_size, rest ); + if( max_size >= 0 ) rest -= size; + int rd[2]; // number of bytes read from each file + for( int i = 0; i < 2; ++i ) + { + rd[i] = readblock( infd[i], buffer[i], size ); + if( rd[i] != size && errno ) + { + show_file_error( filenames[i].c_str(), "Read error", errno ); + return 2; + } + } + + const int min_rd = std::min( rd[0], rd[1] ); + buffer0[min_rd] = 0; // sentinels for the block compare + buffer1[min_rd] = 1; + + int first_diff = block_compare( buffer0, buffer1, &line_number ); + byte_number += first_diff; + + if( first_diff < min_rd ) + { + if( verbosity < 0 ) return 1; // return status only + if( verbosity == 0 ) // show first difference + { + if( !print_bytes ) + std::printf( "%s %s differ: byte %llu, line %llu\n", + filenames[0].c_str(), filenames[1].c_str(), + byte_number, line_number ); + else + { + const unsigned char c0 = buffer0[first_diff]; + const unsigned char c1 = buffer1[first_diff]; + char buf0[5], buf1[5]; + sprintc( buf0, c0 ); sprintc( buf1, c1 ); + std::printf( "%s %s differ: byte %llu, line %llu is %3o %s %3o %s\n", + filenames[0].c_str(), filenames[1].c_str(), + byte_number, line_number, c0, buf0, c1, buf1 ); + } + std::fflush( stdout ); + return 1; + } + else // verbosity > 0 ; show all differences + { + different = 1; + for( ; first_diff < min_rd; ++byte_number, ++first_diff ) + { + const unsigned char c0 = buffer0[first_diff]; + const unsigned char c1 = buffer1[first_diff]; + if( c0 != c1 ) + { + if( !print_bytes ) + std::printf( "%llu %3o %3o\n", byte_number, c0, c1 ); + else + { + char buf0[5], buf1[5]; + sprintc( buf0, c0 ); sprintc( buf1, c1 ); + std::printf( "%llu %3o %-4s %3o %s\n", + byte_number, c0, buf0, c1, buf1 ); + } + } + } + std::fflush( stdout ); + } + } + + if( rd[0] != rd[1] ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: EOF on %s\n", + program_name, filenames[rd[1]<rd[0]].c_str() ); + return 1; + } + if( min_rd != buffer_size ) break; + } + + delete[] buffer0; + return different; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt }; + // number of initial bytes ignored for each file + long long ignore_initial[2] = { 0, 0 }; + long long max_size = -1; // < 0 means unlimited size + int format_types[2] = { -1, -1 }; + bool print_bytes = false; + program_name = "zcmp"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'b', "print-bytes", Arg_parser::no }, + { 'h', "help", Arg_parser::no }, + { 'i', "ignore-initial", Arg_parser::yes }, + { 'l', "list", Arg_parser::no }, + { 'M', "format", Arg_parser::yes }, + { 'n', "bytes", Arg_parser::yes }, + { 'N', "no-rcfile", Arg_parser::no }, + { 'O', "force-format", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 's', "silent", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { 0 , 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 2; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case 'b': print_bytes = true; break; + case 'h': show_help(); return 0; + case 'i': parse_ignore_initial( arg.c_str(), ignore_initial ); break; + case 'l': verbosity = 1; break; + case 'M': parse_format_list( arg ); break; + case 'n': max_size = getnum( arg.c_str() ); break; + case 'N': break; + case 'O': parse_format_types2( arg, format_types ); break; + case 'q': + case 's': verbosity = -1; break; + case 'v': verbosity = 1; break; + case 'V': show_version(); return 0; + case bz2_opt: parse_compressor( arg, fmt_bz2 ); break; + case gz_opt: parse_compressor( arg, fmt_gz ); break; + case lz_opt: parse_compressor( arg, fmt_lz ); break; + case xz_opt: parse_compressor( arg, fmt_xz ); break; + default : internal_error( "uncaught option." ); + } + } // end process options + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + if( argind >= parser.arguments() ) + { show_error( "No files given.", 0, true ); return 2; } + if( argind + 2 < parser.arguments() ) + { show_error( "Too many files.", 0, true ); return 2; } + + const int files = parser.arguments() - argind; + std::string filenames[2]; // file names of the two input files + filenames[0] = parser.argument( argind ); + if( files == 2 ) filenames[1] = parser.argument( argind + 1 ); + + int infd[2]; // file descriptors of the two files + infd[0] = ( filenames[0] == "-" ) ? + STDIN_FILENO : open_instream( filenames[0] ); + if( infd[0] < 0 ) return 2; + + if( files == 2 ) + { + if( check_identical( filenames[0].c_str(), filenames[1].c_str() ) ) + { + if( ignore_initial[0] == ignore_initial[1] ) return 0; + else { show_error( "Can't compare parts of same file." ); return 2; } + } + infd[1] = ( filenames[1] == "-" ) ? + STDIN_FILENO : open_instream( filenames[1] ); + if( infd[1] < 0 ) return 2; + } + else + { + if( filenames[0] == "-" ) + { show_error( "Missing operand after '-'.", 0, true ); return 2; } + if( format_types[0] >= 0 || format_types[1] >= 0 ) + { show_error( "Two files must be given when format is specified.", 0, true ); + return 2; } + filenames[1] = filenames[0]; + infd[1] = open_other_instream( filenames[1] ); + if( infd[1] < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't find file to compare with '%s'.\n", + program_name, filenames[0].c_str() ); + show_error( 0, 0, true ); return 2; + } + } + + int old_infd[2]; // copy of file descriptors of the two files + old_infd[0] = infd[0]; old_infd[1] = infd[1]; + Children children[2]; + if( !set_data_feeder( filenames[0], &infd[0], children[0], format_types[0] ) || + !set_data_feeder( filenames[1], &infd[1], children[1], format_types[1] ) ) + return 2; + + for( int i = 0; i < 2; ++i ) + if( !skip_ignore_initial( ignore_initial[i], infd[i] ) ) + { + show_file_error( filenames[i].c_str(), + "Read error skipping initial bytes", errno ); + return 2; + } + + int retval = cmp( max_size, infd, filenames, print_bytes ); + + for( int i = 0; i < 2; ++i ) + if( !good_status( children[i], retval == 0 && max_size < 0 ) ) retval = 2; + + for( int i = 0; i < 2; ++i ) + { + if( close( infd[i] ) != 0 ) + { show_close_error(); retval = 2; } + if( filenames[i] != "-" && close( old_infd[i] ) != 0 ) + { + show_file_error( filenames[i].c_str(), "Error closing input file", errno ); + retval = 2; + } + } + if( std::fclose( stdout ) != 0 ) + { + show_error( "Error closing stdout", errno ); + retval = 2; + } + + return retval; + } diff --git a/zcmpdiff.cc b/zcmpdiff.cc new file mode 100644 index 0000000..fceb8cf --- /dev/null +++ b/zcmpdiff.cc @@ -0,0 +1,70 @@ +/* Common code for zcmp and zdiff + Copyright (C) 2010-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +int open_instream( const std::string & input_filename ) + { + const int infd = open( input_filename.c_str(), O_RDONLY | O_BINARY ); + if( infd < 0 ) + show_file_error( input_filename.c_str(), "Can't open input file", errno ); + return infd; + } + + +int open_other_instream( std::string & name ) + { + const int eindex = extension_index( name ); + if( eindex >= 0 && enabled_format( -1 ) ) + { // open uncompressed version + name.resize( name.size() - std::strlen( extension_from( eindex ) ) ); + name += extension_to( eindex ); + return open( name.c_str(), O_RDONLY | O_BINARY ); + } + if( eindex < 0 ) // search compressed version + for( int i = 0; i < num_formats; ++i ) + if( enabled_format( format_order[i] ) ) + { + const std::string s( name + simple_extensions[format_order[i]] ); + const int infd = open( s.c_str(), O_RDONLY | O_BINARY ); + if( infd >= 0 ) { name = s; return infd; } + } + return -1; + } + + +void parse_format_types2( const std::string & arg, int format_types[2] ) + { + const unsigned i = std::min( arg.find( ',' ), arg.size() ); + if( i > 0 ) format_types[0] = parse_format_type( arg.substr( 0, i ) ); + else format_types[0] = -1; + if( i + 1 < arg.size() ) format_types[1] = + parse_format_type( arg.substr( i + 1 ) ); + else format_types[1] = -1; + } + + +bool check_identical( const char * const name1, const char * const name2 ) + { + if( std::strcmp( name1, name2 ) == 0 ) return true; + struct stat stat1, stat2; + if( stat( name1, &stat1 ) || stat( name2, &stat2 ) ) return false; + return ( stat1.st_ino == stat2.st_ino && stat1.st_dev == stat2.st_dev ); + } diff --git a/zdiff.cc b/zdiff.cc new file mode 100644 index 0000000..a173971 --- /dev/null +++ b/zdiff.cc @@ -0,0 +1,440 @@ +/* Zdiff - decompress and compare two files line by line + Copyright (C) 2010-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cctype> +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <string> +#include <vector> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> +#if defined(__MSVCRT__) || defined(__OS2__) +#include <io.h> +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + +// 'verbosity' is always 0 in zdiff; no --verbose or --quiet available. + +namespace { + +std::string fifonames[2]; // names of the two fifos passed to diff + +#include "zcmpdiff.cc" + +void show_help() + { + std::printf( "zdiff compares two files and, if they differ, writes to standard output the\n" + "differences line by line. A hyphen '-' used as a file argument means standard\n" + "input. If any file given is compressed, its decompressed content is used.\n" + "zdiff is a front end to the program diff and has the limitation that messages\n" + "from diff refer to temporary file names instead of those specified.\n" + "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" + "\nUsage: zdiff [options] file1 [file2]\n" + "\nzdiff compares file1 to file2. The standard input is used only if file1 or\n" + "file2 refers to standard input. If file2 is omitted zdiff tries the\n" + "following:\n" + "\n - If file1 is compressed, compares its decompressed contents with\n" + " the corresponding uncompressed file (the name of file1 with the\n" + " extension removed).\n" + "\n - If file1 is uncompressed, compares it with the decompressed\n" + " contents of file1.[lz|bz2|gz|xz] (the first one that is found).\n" + "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" + "Some options only work if the diff program used supports them.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -a, --text treat all files as text\n" + " -b, --ignore-space-change ignore changes in the amount of white space\n" + " -B, --ignore-blank-lines ignore changes whose lines are all blank\n" + " -c use the context output format\n" + " -C, --context=<n> same as -c but use <n> lines of context\n" + " -d, --minimal try hard to find a smaller set of changes\n" + " -E, --ignore-tab-expansion ignore changes due to tab expansion\n" + " -i, --ignore-case ignore case differences in file contents\n" + " -M, --format=<list> process only the formats in <list>\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -O, --force-format=[<f1>][,<f2>] force the formats given (bz2, gz, lz, xz)\n" + " -p, --show-c-function show which C function each change is in\n" + " -q, --brief output only whether files differ\n" + " -s, --report-identical-files report when two files are identical\n" + " -t, --expand-tabs expand tabs to spaces in output\n" + " -T, --initial-tab make tabs line up by prepending a tab\n" + " -u use the unified output format\n" + " -U, --unified=<n> same as -u but use <n> lines of context\n" + " -w, --ignore-all-space ignore all white space\n" + " -W, --width=<n> output at most <n> print columns\n" + " -y, --side-by-side output in two columns\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" ); + show_help_addr(); + } + + +const char * my_basename( const char * filename ) + { + const char * c = filename; + while( *c ) { if( *c == '/' ) { filename = c + 1; } ++c; } + return filename; + } + + +extern "C" void remove_fifos() + { + if( fifonames[0].size() ) + { std::remove( fifonames[0].c_str() ); fifonames[0].clear(); } + if( fifonames[1].size() ) + { std::remove( fifonames[1].c_str() ); fifonames[1].clear(); } + } + + +/* Set fifonames[i] to "${TMPDIR}/<coded_pid>[_-]<basename(filenames[i])>" + and create FIFOs. The pid is coded in little endian order. +*/ +bool set_fifonames( const std::string filenames[2] ) + { + enum { num_codes = 36 }; + const char * const codes = "0123456789abcdefghijklmnopqrstuvwxyz"; + const char * p = std::getenv( "TMPDIR" ); + + if( p ) { fifonames[0] = p; fifonames[0] += '/'; } + else fifonames[0] = "/tmp/"; + int n = getpid(); + do fifonames[0] += codes[n % num_codes]; while( n /= num_codes ); + const unsigned pos = fifonames[0].size(); + fifonames[0] += '_'; + fifonames[1] = fifonames[0]; + fifonames[0] += my_basename( filenames[0].c_str() ); + fifonames[1] += my_basename( filenames[1].c_str() ); + if( fifonames[1] == fifonames[0] ) fifonames[1][pos] = '-'; + + for( int i = 0; i < 2; ++i ) + if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) != 0 ) + { + if( errno == EEXIST ) + { + std::remove( fifonames[i].c_str() ); + if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) == 0 ) + continue; + } + show_file_error( fifonames[i].c_str(), "Can't create FIFO", errno ); + return false; + } + return true; + } + + +bool set_data_feeder( const std::string & filename, + const std::string & fifoname, const int infd, + Children & children, int format_index ) + { + uint8_t magic_data[magic_buf_size]; + int magic_size = 0; + if( format_index < 0 ) + format_index = test_format( infd, magic_data, &magic_size ); + children.compressor_name = get_compressor_name( format_index ); + + if( children.compressor_name ) // compressed + { + int fda[2]; // pipe from feeder to compressor + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return false; } + const pid_t pid = fork(); + if( pid == 0 ) // child 1 (compressor feeder) + { + if( close( fda[0] ) != 0 || + !feed_data( filename, infd, fda[1], magic_data, magic_size ) ) + _exit( 2 ); + if( close( fda[1] ) != 0 ) + { show_close_error(); _exit( 2 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return false; } + + const pid_t pid2 = fork(); + if( pid2 == 0 ) // child 2 (compressor) + { + const int outfd = open( fifoname.c_str(), O_WRONLY | O_BINARY ); + if( outfd < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't open FIFO '%s' for writing: %s\n", + program_name, fifoname.c_str(), std::strerror( errno ) ); + _exit( 2 ); + } + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + dup2( outfd, STDOUT_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 && + close( outfd ) == 0 ) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const char ** const argv = new const char *[size+3]; + argv[0] = children.compressor_name; + for( int i = 0; i < size; ++i ) + argv[i+1] = compressor_args[i].c_str(); + argv[size+1] = ( verbosity >= 0 ) ? "-d" : "-dq"; + argv[size+2] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( children.compressor_name ); + _exit( 2 ); + } + if( pid2 < 0 ) // parent + { show_fork_error( children.compressor_name ); return false; } + + close( fda[0] ); close( fda[1] ); + children.pid[0] = pid; + children.pid[1] = pid2; + } + else // uncompressed + { + const pid_t pid = fork(); + if( pid == 0 ) // child (feeder) + { + const int outfd = open( fifoname.c_str(), O_WRONLY | O_BINARY ); + if( outfd < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't open FIFO '%s' for writing: %s\n", + program_name, fifoname.c_str(), std::strerror( errno ) ); + _exit( 2 ); + } + if( !feed_data( filename, infd, outfd, magic_data, magic_size ) ) + _exit( 2 ); + if( close( outfd ) != 0 ) + { show_close_error(); _exit( 2 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return false; } + children.pid[0] = pid; + children.pid[1] = 0; + } + return true; + } + + +extern "C" void signal_handler( int sig ) + { + remove_fifos(); + std::signal( sig, SIG_DFL ); + std::raise( sig ); + } + + +void set_signals() + { + std::signal( SIGHUP, signal_handler ); + std::signal( SIGINT, signal_handler ); + std::signal( SIGTERM, signal_handler ); + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt }; + std::vector< const char * > diff_args; // args to diff, maybe empty + int format_types[2] = { -1, -1 }; + program_name = "zdiff"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'a', "text", Arg_parser::no }, + { 'b', "ignore-space-change", Arg_parser::no }, + { 'B', "ignore-blank-lines", Arg_parser::no }, + { 'c', 0, Arg_parser::no }, + { 'C', "context", Arg_parser::yes }, + { 'd', "minimal", Arg_parser::no }, + { 'E', "ignore-tab-expansion", Arg_parser::no }, + { 'h', "help", Arg_parser::no }, + { 'i', "ignore-case", Arg_parser::no }, + { 'M', "format", Arg_parser::yes }, + { 'N', "no-rcfile", Arg_parser::no }, + { 'O', "force-format", Arg_parser::yes }, + { 'p', "show-c-function", Arg_parser::no }, + { 'q', "brief", Arg_parser::no }, + { 's', "report-identical-files", Arg_parser::no }, + { 't', "expand-tabs", Arg_parser::no }, + { 'T', "initial-tab", Arg_parser::no }, + { 'u', 0, Arg_parser::no }, + { 'U', "unified", Arg_parser::yes }, + { 'V', "version", Arg_parser::no }, + { 'w', "ignore-all-space", Arg_parser::no }, + { 'W', "width", Arg_parser::yes }, + { 'y', "side-by-side", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { 0 , 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 2; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case 'a': diff_args.push_back( "-a" ); break; + case 'b': diff_args.push_back( "-b" ); break; + case 'B': diff_args.push_back( "-B" ); break; + case 'c': diff_args.push_back( "-c" ); break; + case 'C': diff_args.push_back( "-C" ); + diff_args.push_back( arg.c_str() ); break; + case 'd': diff_args.push_back( "-d" ); break; + case 'E': diff_args.push_back( "-E" ); break; + case 'h': show_help(); return 0; + case 'i': diff_args.push_back( "-i" ); break; + case 'M': parse_format_list( arg ); break; + case 'N': break; + case 'O': parse_format_types2( arg, format_types ); break; + case 'p': diff_args.push_back( "-p" ); break; + case 'q': diff_args.push_back( "-q" ); break; + case 's': diff_args.push_back( "-s" ); break; + case 't': diff_args.push_back( "-t" ); break; + case 'T': diff_args.push_back( "-T" ); break; + case 'u': diff_args.push_back( "-u" ); break; + case 'U': diff_args.push_back( "-U" ); + diff_args.push_back( arg.c_str() ); break; + case 'V': show_version(); return 0; + case 'w': diff_args.push_back( "-w" ); break; + case 'W': diff_args.push_back( "-W" ); + diff_args.push_back( arg.c_str() ); break; + case 'y': diff_args.push_back( "-y" ); break; + case bz2_opt: parse_compressor( arg, fmt_bz2 ); break; + case gz_opt: parse_compressor( arg, fmt_gz ); break; + case lz_opt: parse_compressor( arg, fmt_lz ); break; + case xz_opt: parse_compressor( arg, fmt_xz ); break; + default : internal_error( "uncaught option." ); + } + } // end process options + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + if( argind >= parser.arguments() ) + { show_error( "No files given.", 0, true ); return 2; } + if( argind + 2 < parser.arguments() ) + { show_error( "Too many files.", 0, true ); return 2; } + + const int files = parser.arguments() - argind; + std::string filenames[2]; // file names of the two input files + filenames[0] = parser.argument( argind ); + if( files == 2 ) filenames[1] = parser.argument( argind + 1 ); + + int infd[2]; // file descriptors of the two files + infd[0] = ( filenames[0] == "-" ) ? + STDIN_FILENO : open_instream( filenames[0] ); + if( infd[0] < 0 ) return 2; + + if( files == 2 ) + { + if( check_identical( filenames[0].c_str(), filenames[1].c_str() ) ) + return 0; + infd[1] = ( filenames[1] == "-" ) ? + STDIN_FILENO : open_instream( filenames[1] ); + if( infd[1] < 0 ) return 2; + } + else + { + if( filenames[0] == "-" ) + { show_error( "Missing operand after '-'.", 0, true ); return 2; } + if( format_types[0] >= 0 || format_types[1] >= 0 ) + { show_error( "Two files must be given when format is specified.", 0, true ); + return 2; } + filenames[1] = filenames[0]; + infd[1] = open_other_instream( filenames[1] ); + if( infd[1] < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't find file to compare with '%s'.\n", + program_name, filenames[0].c_str() ); + show_error( 0, 0, true ); return 2; + } + } + + std::atexit( remove_fifos ); + set_signals(); + if( !set_fifonames( filenames ) ) return 2; + + Children children[2]; + if( !set_data_feeder( filenames[0], fifonames[0], infd[0], children[0], + format_types[0] ) || + !set_data_feeder( filenames[1], fifonames[1], infd[1], children[1], + format_types[1] ) ) + return 2; + + const pid_t diff_pid = fork(); + if( diff_pid == 0 ) // child (diff) + { + const char ** const argv = new const char *[diff_args.size()+5]; + argv[0] = DIFF; + for( unsigned i = 0; i < diff_args.size(); ++i ) + argv[i+1] = diff_args[i]; + argv[diff_args.size()+1] = "--"; + argv[diff_args.size()+2] = fifonames[0].c_str(); + argv[diff_args.size()+3] = fifonames[1].c_str(); + argv[diff_args.size()+4] = 0; + execvp( argv[0], (char **)argv ); + show_exec_error( DIFF ); + _exit( 2 ); + } + if( diff_pid < 0 ) // parent + { show_fork_error( DIFF ); return 2; } + + int retval = wait_for_child( diff_pid, DIFF ); + + for( int i = 0; i < 2; ++i ) + if( !good_status( children[i], retval == 0 ) ) retval = 2; + + for( int i = 0; i < 2; ++i ) + if( filenames[i] != "-" && close( infd[i] ) != 0 ) + { + show_file_error( filenames[i].c_str(), "Error closing input file", errno ); + retval = 2; + } + + return retval; + } diff --git a/zegrep.in b/zegrep.in new file mode 100644 index 0000000..0cac12e --- /dev/null +++ b/zegrep.in @@ -0,0 +1,3 @@ +#! /bin/sh +bindir=`echo "$0" | sed -e 's,[^/]*$,,'` +exec "${bindir}"zgrep -E "$@" diff --git a/zfgrep.in b/zfgrep.in new file mode 100644 index 0000000..c1a96d8 --- /dev/null +++ b/zfgrep.in @@ -0,0 +1,3 @@ +#! /bin/sh +bindir=`echo "$0" | sed -e 's,[^/]*$,,'` +exec "${bindir}"zgrep -F "$@" diff --git a/zgrep.cc b/zgrep.cc new file mode 100644 index 0000000..1454e77 --- /dev/null +++ b/zgrep.cc @@ -0,0 +1,401 @@ +/* Zgrep - search compressed files for a regular expression + Copyright (C) 2010-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <list> +#include <string> +#include <vector> +#include <dirent.h> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> +#if defined(__MSVCRT__) || defined(__OS2__) +#include <io.h> +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + + +namespace { + +#include "recursive.cc" +#include "zcatgrep.cc" + +void show_help() + { + std::printf( "zgrep is a front end to the program grep that allows transparent search\n" + "on any combination of compressed and uncompressed files. If any file\n" + "given is compressed, its decompressed content is used. If a file given\n" + "does not exist, and its name does not end with one of the known\n" + "extensions, zgrep tries the compressed file names corresponding to the\n" + "formats supported. If a file fails to decompress, zgrep continues\n" + "searching the rest of the files.\n" + "\nIf a file is specified as '-', data are read from standard input,\n" + "decompressed if needed, and fed to grep. Data read from standard input\n" + "must be of the same type; all uncompressed or all in the same\n" + "compressed format.\n" + "\nIf no files are specified, recursive searches examine the current\n" + "working directory, and nonrecursive searches read standard input.\n" + "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" + "\nUsage: zgrep [options] <pattern> [files]\n" + "\nExit status is 0 if match, 1 if no match, 2 if trouble.\n" + "Some options only work if the grep program used supports them.\n" + "\nOptions:\n" + " --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -a, --text treat all files as text\n" + " -A, --after-context=<n> print <n> lines of trailing context\n" + " -b, --byte-offset print the byte offset of each line\n" + " -B, --before-context=<n> print <n> lines of leading context\n" + " -c, --count only print a count of matching lines per file\n" + " -C, --context=<n> print <n> lines of output context\n" + " --color[=<when>] show matched strings in color\n" + " -e, --regexp=<pattern> use <pattern> as the pattern to match\n" + " -E, --extended-regexp <pattern> is an extended regular expression\n" + " -f, --file=<file> obtain patterns from <file>\n" + " -F, --fixed-strings <pattern> is a set of newline-separated strings\n" + " -h, --no-filename suppress the prefixing filename on output\n" + " -H, --with-filename print the filename for each match\n" + " -i, --ignore-case ignore case distinctions\n" + " -I ignore binary files\n" + " -l, --files-with-matches only print names of files containing matches\n" + " -L, --files-without-match only print names of files containing no matches\n" + " -m, --max-count=<n> stop after <n> matches\n" + " -M, --format=<list> process only the formats in <list>\n" + " -n, --line-number print the line number of each line\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -o, --only-matching show only the part of a line matching <pattern>\n" + " -O, --force-format=<fmt> force the format given (bz2, gz, lz, xz)\n" + " -q, --quiet suppress all messages\n" + " -r, --recursive operate recursively on directories\n" + " -R, --dereference-recursive recursively follow symbolic links\n" + " -s, --no-messages suppress error messages\n" + " -v, --invert-match select non-matching lines\n" + " --verbose verbose mode (show error messages)\n" + " -w, --word-regexp match only whole words\n" + " -x, --line-regexp match only whole lines\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" + "\nNumbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); + show_help_addr(); + } + + +int zgrep_stdin( int infd, const int format_index, + const std::vector< const char * > & grep_args ) + { + Children children; + if( !set_data_feeder( "", &infd, children, format_index ) ) return 2; + const pid_t grep_pid = fork(); + if( grep_pid == 0 ) // child (grep) + { + if( dup2( infd, STDIN_FILENO ) >= 0 && close( infd ) == 0 ) + { + const char ** const argv = new const char *[grep_args.size()+2]; + argv[0] = GREP; + for( unsigned i = 0; i < grep_args.size(); ++i ) + argv[i+1] = grep_args[i]; + argv[grep_args.size()+1] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( GREP ); + _exit( 2 ); + } + if( grep_pid < 0 ) // parent + { show_fork_error( GREP ); return 2; } + + int retval = wait_for_child( grep_pid, GREP ); + + if( !good_status( children, retval == 1 ) ) retval = 2; + + if( close( infd ) != 0 ) + { show_close_error(); return 2; } + return retval; + } + + +int zgrep_file( int infd, const int format_index, + const std::string & input_filename, + const std::vector< const char * > & grep_args, + const int list_mode, const bool show_name ) + { + Children children; + if( !set_data_feeder( input_filename, &infd, children, format_index ) ) + return 2; + int fda[2]; // pipe from grep + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return 2; } + const pid_t grep_pid = fork(); + if( grep_pid == 0 ) // child (grep) + { + if( dup2( infd, STDIN_FILENO ) >= 0 && + dup2( fda[1], STDOUT_FILENO ) >= 0 && + close( infd ) == 0 && close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const char ** const argv = new const char *[grep_args.size()+2]; + argv[0] = GREP; + for( unsigned i = 0; i < grep_args.size(); ++i ) + argv[i+1] = grep_args[i]; + argv[grep_args.size()+1] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( GREP ); + _exit( 2 ); + } + if( grep_pid < 0 ) // parent + { show_fork_error( GREP ); return 2; } + + close( fda[1] ); + enum { buffer_size = 256 }; + uint8_t buffer[buffer_size]; + bool line_begin = true; + while( true ) + { + const int size = readblock( fda[0], buffer, buffer_size ); + if( size != buffer_size && errno ) + { show_error( "Read error", errno ); return 2; } + if( size > 0 && !list_mode ) + { + if( show_name ) + for( int i = 0; i < size; ++i ) + { + if( line_begin ) + { line_begin = false; std::printf( "%s:", input_filename.c_str() ); } + if( buffer[i] == '\n' ) line_begin = true; + putchar( buffer[i] ); + } + else if( std::fwrite( buffer, 1, size, stdout ) != (unsigned)size ) + { std::fflush( stdout ); show_error( "Write error", errno ); return 2; } + std::fflush( stdout ); + } + if( size < buffer_size ) break; // end of grep's output + } + + int retval = wait_for_child( grep_pid, GREP ); + + if( !good_status( children, retval == 1 ) ) retval = 2; + + if( list_mode && (retval == 0) == (list_mode == 1) ) + { std::printf( "%s\n", input_filename.c_str() ); std::fflush( stdout ); } + if( close( infd ) != 0 ) + { show_close_error(); return 2; } + if( close( fda[0] ) != 0 ) + { show_close_error( GREP ); return 2; } + return retval; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { help_opt = 256, verbose_opt, color_opt, + bz2_opt, gz_opt, lz_opt, xz_opt }; + int format_index = -1; + int list_mode = 0; // 1 = list matches, -1 = list non-matches + int recursive = 0; // 1 = '-r', 2 = '-R' + int show_name = -1; // tri-state bool + bool no_messages = false; + std::list< std::string > filenames; + std::vector< const char * > grep_args; // args to grep, maybe empty + std::string color_option; // needed because of optional arg + program_name = "zgrep"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'a', "text", Arg_parser::no }, // grep GNU + { 'A', "after-context", Arg_parser::yes }, // grep GNU + { 'b', "byte-offset", Arg_parser::no }, // grep GNU + { 'B', "before-context", Arg_parser::yes }, // grep GNU + { 'c', "count", Arg_parser::no }, // grep + { 'C', "context", Arg_parser::yes }, // grep GNU + { 'e', "regexp", Arg_parser::yes }, // grep + { 'E', "extended-regexp", Arg_parser::no }, // grep + { 'f', "file ", Arg_parser::yes }, // grep + { 'F', "fixed-strings", Arg_parser::no }, // grep + { 'h', "no-filename", Arg_parser::no }, // grep GNU + { 'H', "with-filename", Arg_parser::no }, // grep GNU + { 'i', "ignore-case", Arg_parser::no }, // grep + { 'I', 0, Arg_parser::no }, // grep GNU + { 'l', "files-with-matches", Arg_parser::no }, // grep + { 'L', "files-without-match", Arg_parser::no }, // grep GNU + { 'm', "max-count", Arg_parser::yes }, // grep GNU + { 'M', "format", Arg_parser::yes }, + { 'n', "line-number", Arg_parser::no }, // grep + { 'N', "no-rcfile", Arg_parser::no }, + { 'o', "only-matching", Arg_parser::no }, // grep + { 'O', "force-format", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'R', "dereference-recursive", Arg_parser::no }, + { 's', "no-messages", Arg_parser::no }, // grep + { 'v', "invert-match", Arg_parser::no }, // grep + { 'V', "version", Arg_parser::no }, + { 'w', "word-regexp", Arg_parser::no }, // grep GNU + { 'x', "line-regexp", Arg_parser::no }, // grep + { help_opt, "help", Arg_parser::no }, + { verbose_opt, "verbose", Arg_parser::no }, + { color_opt, "color", Arg_parser::maybe }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { 0 , 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 2; } + + maybe_process_config_file( parser ); + + int argind = 0; + bool pattern_found = false; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case 'a': grep_args.push_back( "-a" ); break; + case 'A': grep_args.push_back( "-A" ); + grep_args.push_back( arg.c_str() ); break; + case 'b': grep_args.push_back( "-b" ); break; + case 'B': grep_args.push_back( "-B" ); + grep_args.push_back( arg.c_str() ); break; + case 'c': grep_args.push_back( "-c" ); break; + case 'C': grep_args.push_back( "-C" ); + grep_args.push_back( arg.c_str() ); break; + case 'e': grep_args.push_back( "-e" ); + grep_args.push_back( arg.c_str() ); pattern_found = true; break; + case 'E': grep_args.push_back( "-E" ); break; + case 'f': grep_args.push_back( "-f" ); + grep_args.push_back( arg.c_str() ); pattern_found = true; break; + case 'F': grep_args.push_back( "-F" ); break; + case 'h': show_name = false; break; + case 'H': show_name = true; break; + case 'i': grep_args.push_back( "-i" ); break; + case 'I': grep_args.push_back( "-I" ); break; + case 'l': grep_args.push_back( "-l" ); list_mode = 1; break; + case 'L': grep_args.push_back( "-L" ); list_mode = -1; break; + case 'm': grep_args.push_back( "-m" ); + grep_args.push_back( arg.c_str() ); break; + case 'M': parse_format_list( arg ); break; + case 'n': grep_args.push_back( "-n" ); break; + case 'N': break; + case 'o': grep_args.push_back( "-o" ); break; + case 'O': format_index = parse_format_type( arg ); break; + case 'q': grep_args.push_back( "-q" ); verbosity = -1; break; + case 'r': recursive = 1; break; + case 'R': recursive = 2; break; + case 's': grep_args.push_back( "-s" ); no_messages = true; break; + case 'v': grep_args.push_back( "-v" ); break; + case 'V': show_version(); return 0; + case 'w': grep_args.push_back( "-w" ); break; + case 'x': grep_args.push_back( "-x" ); break; + case help_opt : show_help(); return 0; + case verbose_opt: if( verbosity < 4 ) ++verbosity; + no_messages = false; break; + case color_opt: color_option = "--color"; + if( !arg.empty() ) { color_option += '='; color_option += arg; } + break; + case bz2_opt: parse_compressor( arg, fmt_bz2 ); break; + case gz_opt: parse_compressor( arg, fmt_gz ); break; + case lz_opt: parse_compressor( arg, fmt_lz ); break; + case xz_opt: parse_compressor( arg, fmt_xz ); break; + default : internal_error( "uncaught option." ); + } + } // end process options + + if( !color_option.empty() ) // push the last value set + grep_args.push_back( color_option.c_str() ); + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + if( !pattern_found ) + { + if( argind >= parser.arguments() ) + { show_error( "Pattern not found." ); return 2; } + const std::string & arg = parser.argument( argind++ ); + if( arg.size() && arg[0] == '-' ) grep_args.push_back( "-e" ); + grep_args.push_back( arg.c_str() ); + } + + for( ; argind < parser.arguments(); ++argind ) + filenames.push_back( parser.argument( argind ) ); + + if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" ); + + if( show_name < 0 ) show_name = ( filenames.size() != 1 || recursive ); + + std::string input_filename; + int retval = 1; + bool error = false; + bool stdin_used = false; + while( next_filename( filenames, input_filename, error, recursive, + false, no_messages ) ) + { + int infd; + if( input_filename == "." ) + { + if( stdin_used ) continue; else stdin_used = true; + infd = STDIN_FILENO; input_filename = "-"; + } + else + { + infd = open_instream( input_filename, format_index < 0, no_messages ); + if( infd < 0 ) { error = true; continue; } + } + + int tmp; + if( infd == STDIN_FILENO ) + tmp = zgrep_stdin( infd, format_index, grep_args ); + else tmp = zgrep_file( infd, format_index, input_filename, grep_args, + list_mode, show_name ); + if( tmp == 0 || ( tmp == 2 && retval == 1 ) ) retval = tmp; + + if( close( infd ) != 0 ) + { show_file_error( input_filename.c_str(), "Error closing input file", + errno ); error = true; } + if( retval == 0 && verbosity < 0 ) break; + } + + if( std::fclose( stdout ) != 0 ) + { + show_error( "Error closing stdout", errno ); + error = true; + } + if( error && ( retval != 0 || verbosity >= 0 ) ) retval = 2; + return retval; + } diff --git a/ztest.cc b/ztest.cc new file mode 100644 index 0000000..812278e --- /dev/null +++ b/ztest.cc @@ -0,0 +1,335 @@ +/* Ztest - verify the integrity of compressed files + Copyright (C) 2010-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <list> +#include <string> +#include <vector> +#include <dirent.h> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> +#if defined(__MSVCRT__) || defined(__OS2__) +#include <io.h> +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +namespace { + +#include "recursive.cc" + +void show_help() + { + std::printf( "ztest verifies the integrity of the compressed files specified.\n" + "Uncompressed files are ignored. If a file is specified as '-', the\n" + "integrity of compressed data read from standard input is verified. Data\n" + "read from standard input must be all in the same compressed format. If\n" + "a file fails to decompress, does not exist, can't be opened, or is a\n" + "terminal, ztest continues verifying the rest of the files. A final\n" + "diagnostic is shown at verbosity level 1 or higher if any file fails the\n" + "test when testing multiple files.\n" + "\nIf no files are specified, recursive searches examine the current\n" + "working directory, and nonrecursive searches read standard input.\n" + "\nThe formats supported are bzip2, gzip, lzip, and xz.\n" + "\nNote that error detection in the xz format is broken. First, some xz\n" + "files lack integrity information. Second, not all xz decompressors can\n" + "verify the integrity of all xz files. Third, section 2.1.1.2 'Stream\n" + "Flags' of the xz format specification allows xz decompressors to produce\n" + "garbage output without issuing any warning. Therefore, xz files can't\n" + "always be verified as reliably as files in the other formats can.\n" + "\nUsage: ztest [options] [files]\n" + "\nExit status is 0 if all compressed files verify OK, 1 if environmental\n" + "problems (file not found, invalid flags, I/O errors, etc), 2 if any\n" + "compressed file is corrupt or invalid.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -M, --format=<list> process only the formats in <list>\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -O, --force-format=<fmt> force the format given (bz2, gz, lz, xz)\n" + " -q, --quiet suppress all messages\n" + " -r, --recursive operate recursively on directories\n" + " -R, --dereference-recursive recursively follow symbolic links\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" ); + show_help_addr(); + } + + +int open_instream( const std::string & input_filename ) + { + const int infd = open( input_filename.c_str(), O_RDONLY | O_BINARY ); + if( infd < 0 ) + show_file_error( input_filename.c_str(), "Can't open input file", errno ); + return infd; + } + + +int ztest_stdin( const int infd, int format_index, + const std::vector< const char * > & ztest_args ) + { + uint8_t magic_data[magic_buf_size]; + int magic_size = 0; + if( format_index < 0 ) + format_index = test_format( infd, magic_data, &magic_size ); + const char * const compressor_name = get_compressor_name( format_index ); + if( !compressor_name ) + { show_error( "Unknown data format read from stdin." ); return 2; } + int fda[2]; // pipe from feeder + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return 1; } + + const pid_t pid = fork(); + if( pid == 0 ) // child1 (compressor feeder) + { + if( close( fda[0] ) != 0 || + !feed_data( "", infd, fda[1], magic_data, magic_size ) ) + _exit( 1 ); + if( close( fda[1] ) != 0 ) + { show_close_error(); _exit( 1 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return 1; } + + const pid_t pid2 = fork(); + if( pid2 == 0 ) // child2 (compressor) + { + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const int size2 = ztest_args.size(); + const char ** const argv = new const char *[size+size2+3]; + argv[0] = compressor_name; + for( int i = 0; i < size; ++i ) + argv[i+1] = compressor_args[i].c_str(); + for( int i = 0; i < size2; ++i ) + argv[i+size+1] = ztest_args[i]; + argv[size+size2+1] = "-t"; + argv[size+size2+2] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( compressor_name ); + _exit( 1 ); + } + if( pid2 < 0 ) // parent + { show_fork_error( compressor_name ); return 1; } + + close( fda[0] ); close( fda[1] ); + const bool isgzxz = ( format_index == fmt_gz || format_index == fmt_xz ); + int retval = wait_for_child( pid2, compressor_name, 1, isgzxz ); + if( retval == 0 && wait_for_child( pid, "data feeder" ) != 0 ) + retval = 1; + return retval; + } + + +int ztest_file( const int infd, int format_index, + const std::string & input_filename, + const std::vector< const char * > & ztest_args ) + { + static int disable_xz = -1; // tri-state bool + uint8_t magic_data[magic_buf_size]; + int magic_size = 0; + if( format_index < 0 ) + format_index = test_format( infd, magic_data, &magic_size ); + const char * const compressor_name = get_compressor_name( format_index ); + if( !compressor_name ) + return 0; // ignore this file + if( format_index == fmt_xz ) + { + if( disable_xz < 0 ) + { + std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; + disable_xz = ( std::system( command.c_str() ) != 0 ); + } + if( disable_xz ) return 0; // ignore this file if no xz installed + } + + const pid_t pid = fork(); + + if( pid == 0 ) // child (compressor) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const int size2 = ztest_args.size(); + const char ** const argv = new const char *[size+size2+5]; + argv[0] = compressor_name; + for( int i = 0; i < size; ++i ) + argv[i+1] = compressor_args[i].c_str(); + for( int i = 0; i < size2; ++i ) + argv[i+size+1] = ztest_args[i]; + argv[size+size2+1] = "-t"; + argv[size+size2+2] = "--"; + argv[size+size2+3] = input_filename.c_str(); + argv[size+size2+4] = 0; + execvp( argv[0], (char **)argv ); + show_exec_error( compressor_name ); + _exit( 1 ); + } + if( pid < 0 ) // parent + { show_fork_error( compressor_name ); return 1; } + + const bool isgzxz = ( format_index == fmt_gz || format_index == fmt_xz ); + return wait_for_child( pid, compressor_name, 1, isgzxz ); + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt }; + int format_index = -1; + int recursive = 0; // 1 = '-r', 2 = '-R' + std::list< std::string > filenames; + std::vector< const char * > ztest_args; // args to ztest, maybe empty + program_name = "ztest"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'h', "help", Arg_parser::no }, + { 'M', "format", Arg_parser::yes }, + { 'N', "no-rcfile", Arg_parser::no }, + { 'O', "force-format", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'R', "dereference-recursive", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { 0 , 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 1; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case 'h': show_help(); return 0; + case 'M': parse_format_list( arg ); break; + case 'N': break; + case 'O': format_index = parse_format_type( arg ); break; + case 'q': verbosity = -1; ztest_args.push_back( "-q" ); break; + case 'r': recursive = 1; break; + case 'R': recursive = 2; break; + case 'v': if( verbosity < 4 ) ++verbosity; + ztest_args.push_back( "-v" ); break; + case 'V': show_version(); return 0; + case bz2_opt: parse_compressor( arg, fmt_bz2, 1 ); break; + case gz_opt: parse_compressor( arg, fmt_gz, 1 ); break; + case lz_opt: parse_compressor( arg, fmt_lz, 1 ); break; + case xz_opt: parse_compressor( arg, fmt_xz, 1 ); break; + default : internal_error( "uncaught option." ); + } + } // end process options + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + for( ; argind < parser.arguments(); ++argind ) + filenames.push_back( parser.argument( argind ) ); + + if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" ); + + std::string input_filename; + int files_tested = 0, failed_tests = 0; + int retval = 0; + bool error = false; + bool stdin_used = false; + while( next_filename( filenames, input_filename, error, recursive ) ) + { + int infd; + if( input_filename == "." ) + { + if( stdin_used ) continue; else stdin_used = true; + infd = STDIN_FILENO; input_filename = "-"; + } + else + { + infd = open_instream( input_filename ); + if( infd < 0 ) { error = true; continue; } + } + + if( isatty( infd ) ) // for example /dev/tty + { + show_file_error( input_filename == "-" ? "(stdin)" : input_filename.c_str(), + "I won't read compressed data from a terminal." ); + close( infd ); error = true; continue; + } + + int tmp; + if( infd == STDIN_FILENO ) + tmp = ztest_stdin( infd, format_index, ztest_args ); + else tmp = ztest_file( infd, format_index, input_filename, ztest_args ); + if( tmp > retval ) retval = tmp; + ++files_tested; if( tmp ) ++failed_tests; + + if( close( infd ) != 0 ) + { show_file_error( input_filename.c_str(), "Error closing input file", + errno ); error = true; } + } + + if( std::fclose( stdout ) != 0 ) // in case decompressor writes to stdout + { + show_error( "Error closing stdout", errno ); + error = true; + } + if( error && retval == 0 ) retval = 1; + if( failed_tests > 0 && verbosity >= 1 && files_tested > 1 ) + std::fprintf( stderr, "%s: warning: %d %s failed the test.\n", + program_name, failed_tests, + ( failed_tests == 1 ) ? "file" : "files" ); + return retval; + } diff --git a/zupdate.cc b/zupdate.cc new file mode 100644 index 0000000..a605f35 --- /dev/null +++ b/zupdate.cc @@ -0,0 +1,412 @@ +/* Zupdate - recompress bzip2, gzip, xz files to lzip format + Copyright (C) 2013-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <list> +#include <string> +#include <vector> +#include <dirent.h> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <utime.h> +#include <sys/stat.h> +#include <sys/wait.h> +#if defined(__MSVCRT__) || defined(__OS2__) +#include <io.h> +#endif + +#include "arg_parser.h" +#include "rc.h" + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +namespace { + +#include "recursive.cc" + +void show_help() + { + std::printf( "zupdate recompresses files from bzip2, gzip, and xz formats to lzip\n" + "format. Each original is compared with the new file and then deleted.\n" + "Only regular files with standard file name extensions are recompressed,\n" + "other files are ignored. Compressed files are decompressed and then\n" + "recompressed on the fly; no temporary files are created. The lzip format\n" + "is chosen as destination because it is the most appropriate for\n" + "long-term data archiving.\n" + "\nIf no files are specified, recursive searches examine the current\n" + "working directory, and nonrecursive searches do nothing.\n" + "\nIf the lzip compressed version of a file already exists, the file is\n" + "skipped unless the option '--force' is given. In this case, if the\n" + "comparison with the existing lzip version fails, an error is returned\n" + "and the original file is not deleted. The operation of zupdate is meant\n" + "to be safe and not cause any data loss. Therefore, existing lzip\n" + "compressed files are never overwritten nor deleted.\n" + "\nThe names of the original files must have one of the following extensions:\n" + "'.bz2', '.gz', or '.xz', which are recompressed to '.lz';\n" + "'.tbz', '.tbz2', '.tgz', or '.txz', which are recompressed to '.tlz'.\n" + "\nUsage: zupdate [options] [files]\n" + "\nExit status is 0 if all the compressed files were successfully recompressed\n" + "(if needed), compared, and deleted (if requested). Non-zero otherwise.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -f, --force don't skip a file even if the .lz exists\n" + " -k, --keep keep (don't delete) input files\n" + " -l, --lzip-verbose pass one option -v to the lzip compressor\n" + " -M, --format=<list> process only the formats in <list>\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -q, --quiet suppress all messages\n" + " -r, --recursive operate recursively on directories\n" + " -R, --dereference-recursive recursively follow symbolic links\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " -0 .. -9 set compression level [default 9]\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" ); + show_help_addr(); + } + + +int cant_execute( const std::string & command, const int status ) + { + if( verbosity >= 0 ) + { + if( WIFEXITED( status ) ) + std::fprintf( stderr, "%s: Error executing '%s'. Exit status = %d\n", + program_name, command.c_str(), WEXITSTATUS( status ) ); + else + std::fprintf( stderr, "%s: Can't execute '%s'\n", + program_name, command.c_str() ); + } + return 1; + } + + +// Set permissions, owner, and times. +void set_permissions( const char * const rname, const struct stat & in_stats ) + { + bool warning = false; + const mode_t mode = in_stats.st_mode; + // chown will in many cases return with EPERM, which can be safely ignored. + if( chown( rname, in_stats.st_uid, in_stats.st_gid ) == 0 ) + { if( chmod( rname, mode ) != 0 ) warning = true; } + else + if( errno != EPERM || + chmod( rname, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) + warning = true; + struct utimbuf t; + t.actime = in_stats.st_atime; + t.modtime = in_stats.st_mtime; + if( utime( rname, &t ) != 0 ) warning = true; + if( warning && verbosity >= 2 ) + show_error( "Can't change output file attributes." ); + } + + + // Returns 0 for success, -1 for file skipped, 1 for error. +int zupdate_file( const std::string & name, const char * const lzip_name, + const std::vector< std::string > & lzip_args2, + const bool force, const bool keep_input_files, + const bool no_rcfile ) + { + static int disable_xz = -1; // tri-state bool + int format_index = -1; + std::string rname; // recompressed name + + const int eindex = extension_index( name ); // search extension + if( eindex >= 0 ) + { + format_index = extension_format( eindex ); + if( format_index == fmt_lz ) + { + if( verbosity >= 2 ) + std::fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n", + program_name, name.c_str(), extension_from( eindex ) ); + return 0; // ignore this file + } + rname.assign( name, 0, name.size() - std::strlen( extension_from( eindex ) ) ); + rname += ( std::strcmp( extension_to( eindex ), ".tar" ) == 0 ) ? + ".tlz" : ".lz"; // keep combined extension + } + const char * const compressor_name = get_compressor_name( format_index ); + if( !compressor_name ) + { + if( verbosity >= 2 ) + std::fprintf( stderr, "%s: Unknown extension in file name '%s' -- ignored.\n", + program_name, name.c_str() ); + return 0; // ignore this file + } + + struct stat in_stats; + if( stat( name.c_str(), &in_stats ) != 0 ) // check input file + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't stat input file '%s': %s\n", + program_name, name.c_str(), std::strerror( errno ) ); + return 1; + } + if( !S_ISREG( in_stats.st_mode ) ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Input file '%s' is not a regular file.\n", + program_name, name.c_str() ); + return 1; + } + + struct stat st; // not used + const std::string rname2( rname + ".lz" ); // produced by lzip < 1.20 + const bool lz_exists = ( stat( rname.c_str(), &st ) == 0 ); + // don't modify an existing 'rname.lz' + const bool lz_lz_exists = ( stat( rname2.c_str(), &st ) == 0 ); + if( lz_exists && !force ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n", + program_name, rname.c_str() ); + return -1; + } + + if( format_index == fmt_xz ) + { + if( disable_xz < 0 ) + { + std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; + disable_xz = ( std::system( command.c_str() ) != 0 ); + } + if( disable_xz ) return 0; // ignore this file if no xz installed + } + + if( !lz_exists ) // recompress + { + if( verbosity >= 1 ) + std::fprintf( stderr, "Recompressing file '%s'\n", name.c_str() ); + int fda[2]; // pipe between decompressor and compressor + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return 1; } + + const pid_t pid = fork(); + if( pid == 0 ) // child1 (decompressor) + { + if( dup2( fda[1], STDOUT_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const char ** const argv = new const char *[size+5]; + argv[0] = compressor_name; + for( int i = 0; i < size; ++i ) argv[i+1] = compressor_args[i].c_str(); + argv[size+1] = "-cd"; + argv[size+2] = "--"; + argv[size+3] = name.c_str(); + argv[size+4] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( compressor_name ); + _exit( 1 ); + } + if( pid < 0 ) // parent + { show_fork_error( compressor_name ); return 1; } + + const pid_t pid2 = fork(); + if( pid2 == 0 ) // child2 (lzip compressor) + { + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const std::vector< std::string > & lzip_args = + get_compressor_args( fmt_lz ); + const int size = lzip_args.size(); + const int size2 = lzip_args2.size(); + const char ** const argv = new const char *[size+size2+5]; + argv[0] = lzip_name; + argv[1] = "-9"; + for( int i = 0; i < size; ++i ) argv[i+2] = lzip_args[i].c_str(); + for( int i = 0; i < size2; ++i ) argv[i+size+2] = lzip_args2[i].c_str(); + argv[size+size2+2] = "-o"; + argv[size+size2+3] = rname.c_str(); + argv[size+size2+4] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( lzip_name ); + _exit( 1 ); + } + if( pid2 < 0 ) // parent + { show_fork_error( lzip_name ); return 1; } + + close( fda[0] ); close( fda[1] ); + int retval = wait_for_child( pid, compressor_name ); + int retval2 = wait_for_child( pid2, lzip_name ); + if( retval || retval2 ) + { if( !lz_lz_exists ) std::remove( rname2.c_str() ); // lzip < 1.20 + std::remove( rname.c_str() ); return 1; } + if( stat( rname.c_str(), &st ) != 0 && + ( lz_lz_exists || stat( rname2.c_str(), &st ) != 0 || + std::rename( rname2.c_str(), rname.c_str() ) != 0 ) ) + { show_file_error( rname.c_str(), "Error renaming output file", errno ); + return 1; } // lzip < 1.11 + set_permissions( rname.c_str(), in_stats ); + } + + { + if( lz_exists && verbosity >= 1 ) + std::fprintf( stderr, "Comparing file '%s'\n", name.c_str() ); + std::string zcmp_command( invocation_name ); + unsigned i = zcmp_command.size(); + while( i > 0 && zcmp_command[i-1] != '/' ) --i; + zcmp_command.resize( i ); zcmp_command.insert( zcmp_command.begin(), '\'' ); + zcmp_command += "zcmp' "; // '[dir/]zcmp' + if( no_rcfile ) zcmp_command += "-N "; + if( verbosity < 0 ) zcmp_command += "-q "; + zcmp_command += '\''; zcmp_command += name; + zcmp_command += "' '"; zcmp_command += rname; zcmp_command += '\''; + int status = std::system( zcmp_command.c_str() ); + if( status != 0 ) + { if( !lz_exists ) std::remove( rname.c_str() ); + return cant_execute( zcmp_command, status ); } + } + + if( !keep_input_files && std::remove( name.c_str() ) != 0 && errno != ENOENT ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't delete input file '%s': %s\n", + program_name, name.c_str(), std::strerror( errno ) ); + return 1; + } + return 0; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt }; + int recursive = 0; // 1 = '-r', 2 = '-R' + std::list< std::string > filenames; + std::vector< std::string > lzip_args2; // args to lzip, maybe empty + bool force = false; + bool keep_input_files = false; + bool no_rcfile = false; + program_name = "zupdate"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { '0', 0, Arg_parser::no }, + { '1', 0, Arg_parser::no }, + { '2', 0, Arg_parser::no }, + { '3', 0, Arg_parser::no }, + { '4', 0, Arg_parser::no }, + { '5', 0, Arg_parser::no }, + { '6', 0, Arg_parser::no }, + { '7', 0, Arg_parser::no }, + { '8', 0, Arg_parser::no }, + { '9', 0, Arg_parser::no }, + { 'f', "force", Arg_parser::no }, + { 'h', "help", Arg_parser::no }, + { 'k', "keep", Arg_parser::no }, + { 'l', "lzip-verbose", Arg_parser::no }, + { 'M', "format", Arg_parser::yes }, + { 'N', "no-rcfile", Arg_parser::no }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'R', "dereference-recursive", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { 0 , 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 1; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + lzip_args2.push_back( "-" ); lzip_args2.back() += code; break; + case 'f': force = true; break; + case 'h': show_help(); return 0; + case 'k': keep_input_files = true; break; + case 'l': lzip_args2.push_back( "-v" ); break; + case 'M': parse_format_list( arg ); break; + case 'N': no_rcfile = true; break; + case 'q': verbosity = -1; lzip_args2.push_back( "-q" ); break; + case 'r': recursive = 1; break; + case 'R': recursive = 2; break; + case 'v': if( verbosity < 4 ) ++verbosity; break; + case 'V': show_version(); return 0; + case bz2_opt: parse_compressor( arg, fmt_bz2, 1 ); break; + case gz_opt: parse_compressor( arg, fmt_gz, 1 ); break; + case lz_opt: parse_compressor( arg, fmt_lz, 1 ); break; + case xz_opt: parse_compressor( arg, fmt_xz, 1 ); break; + default : internal_error( "uncaught option." ); + } + } // end process options + +#if defined(__MSVCRT__) || defined(__OS2__) + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + const char * const lzip_name = get_compressor_name( fmt_lz ); + if( !lzip_name ) + { show_error( "Missing name of compressor for lzip format." ); return 1; } + + for( ; argind < parser.arguments(); ++argind ) + filenames.push_back( parser.argument( argind ) ); + + if( filenames.empty() && recursive ) filenames.push_back( "." ); + + std::string input_filename; + int retval = 0; + bool error = false; + while( next_filename( filenames, input_filename, error, recursive, true ) ) + { + int tmp = zupdate_file( input_filename, lzip_name, lzip_args2, force, + keep_input_files, no_rcfile ); + if( tmp < 0 ) error = true; + if( tmp > retval ) retval = tmp; + if( tmp > 0 ) break; + } + if( error && retval == 0 ) retval = 1; + return retval; + } diff --git a/zutils.cc b/zutils.cc new file mode 100644 index 0000000..54090ff --- /dev/null +++ b/zutils.cc @@ -0,0 +1,283 @@ +/* Zutils - Utilities dealing with compressed files + Copyright (C) 2009-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <string> +#include <vector> +#include <stdint.h> +#include <unistd.h> +#include <sys/wait.h> + +#include "rc.h" +#include "zutils.h" + + +namespace { + +inline bool isvalid_ds( const uint8_t ds ) // lzip valid dictionary_size + { + enum { min_dictionary_size = 1 << 12, + max_dictionary_size = 1 << 29 }; + unsigned dictionary_size = ( 1 << ( ds & 0x1F ) ); + if( dictionary_size > min_dictionary_size ) + dictionary_size -= ( dictionary_size / 16 ) * ( ( ds >> 5 ) & 7 ); + return ( dictionary_size >= min_dictionary_size && + dictionary_size <= max_dictionary_size ); + } + + +/* Returns -1 if child not terminated, 2 in case of error, or exit status of + child process 'pid'. +*/ +int child_status( const pid_t pid, const char * const name ) + { + int status; + while( true ) + { + const int tmp = waitpid( pid, &status, WNOHANG ); + if( tmp == -1 && errno != EINTR ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Error checking status of '%s': %s\n", + program_name, name, std::strerror( errno ) ); + _exit( 2 ); + } + if( tmp == 0 ) return -1; // child not terminated + if( tmp == pid ) break; // child terminated + } + if( WIFEXITED( status ) ) return WEXITSTATUS( status ); + if( WIFSIGNALED( status ) && WTERMSIG( status ) == SIGPIPE ) return 0; + return 2; + } + +} // end namespace + + +/* Returns the number of bytes really read. + If (returned value < size) and (errno == 0), means EOF was reached. +*/ +int readblock( const int fd, uint8_t * const buf, const int size ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = read( fd, buf + sz, size - sz ); + if( n > 0 ) sz += n; + else if( n == 0 ) break; // EOF + else if( errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +/* Returns the number of bytes really written. + If (returned value < size), it is always an error. +*/ +int writeblock( const int fd, const uint8_t * const buf, const int size ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = write( fd, buf + sz, size - sz ); + if( n > 0 ) sz += n; + else if( n < 0 && errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +// Empty filename means stdin. +// +bool feed_data( const std::string & filename, const int infd, const int outfd, + const uint8_t * magic_data, const int magic_size ) + { + if( magic_size && writeblock( outfd, magic_data, magic_size ) != magic_size ) + { show_error( "Write error", errno ); return false; } + enum { buffer_size = 4096 }; + uint8_t buffer[buffer_size]; + while( true ) + { + const int size = readblock( infd, buffer, buffer_size ); + if( size != buffer_size && errno ) + { const char * const name = filename.empty() ? "-" : filename.c_str(); + show_file_error( name, "Read error", errno ); return false; } + if( size > 0 && writeblock( outfd, buffer, size ) != size ) + { show_error( "Write error", errno ); return false; } + if( size < buffer_size ) break; + } + return true; + } + + +bool good_status( const Children & children, const bool finished ) + { + bool error = false; + for( int i = 0; i < 2; ++i ) + { + const pid_t pid = children.pid[i]; + if( pid ) + { + const char * const name = + ( i == 0 ) ? "data feeder" : children.compressor_name; + // even if compressor finished, trailing data may remain in data feeder + if( i == 0 || !finished ) + { + const int tmp = child_status( pid, name ); + if( tmp < 0 ) // child not terminated + { kill( pid, SIGTERM ); wait_for_child( pid, name ); } + else if( tmp != 0 ) error = true; // child status != 0 + } + else + if( wait_for_child( pid, name ) != 0 ) error = true; + } + } + return !error; + } + + +bool set_data_feeder( const std::string & filename, int * const infdp, + Children & children, int format_index ) + { + uint8_t magic_data[magic_buf_size]; + int magic_size = 0; + if( format_index < 0 ) + format_index = test_format( *infdp, magic_data, &magic_size ); + children.compressor_name = get_compressor_name( format_index ); + + if( children.compressor_name ) // compressed + { + int fda[2]; // pipe from feeder + int fda2[2]; // pipe from compressor + if( pipe( fda ) < 0 || pipe( fda2 ) < 0 ) + { show_error( "Can't create pipe", errno ); return false; } + const int old_infd = *infdp; + *infdp = fda2[0]; + const pid_t pid = fork(); + if( pid == 0 ) // child 1 (compressor feeder) + { + if( close( fda[0] ) != 0 || + close( fda2[0] ) != 0 || close( fda2[1] ) != 0 || + !feed_data( filename, old_infd, fda[1], magic_data, magic_size ) ) + _exit( 2 ); + if( close( fda[1] ) != 0 ) + { show_close_error(); _exit( 2 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return false; } + + const pid_t pid2 = fork(); + if( pid2 == 0 ) // child 2 (compressor) + { + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + dup2( fda2[1], STDOUT_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 && + close( fda2[0] ) == 0 && close( fda2[1] ) == 0 ) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const char ** const argv = new const char *[size+3]; + argv[0] = children.compressor_name; + for( int i = 0; i < size; ++i ) + argv[i+1] = compressor_args[i].c_str(); + argv[size+1] = ( verbosity >= 0 ) ? "-d" : "-dq"; + argv[size+2] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( children.compressor_name ); + _exit( 2 ); + } + if( pid2 < 0 ) // parent + { show_fork_error( children.compressor_name ); return false; } + + close( fda[0] ); close( fda[1] ); close( fda2[1] ); + children.pid[0] = pid; + children.pid[1] = pid2; + } + else // uncompressed + { + int fda[2]; // pipe from feeder + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return false; } + const int old_infd = *infdp; + *infdp = fda[0]; + const pid_t pid = fork(); + if( pid == 0 ) // child (feeder) + { + if( close( fda[0] ) != 0 || + !feed_data( filename, old_infd, fda[1], magic_data, magic_size ) ) + _exit( 2 ); + if( close( fda[1] ) != 0 ) + { show_close_error(); _exit( 2 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return false; } + close( fda[1] ); + children.pid[0] = pid; + children.pid[1] = 0; + } + return true; + } + + +// Returns format index or -1 if uncompressed +// +int test_format( const int infd, uint8_t magic_data[], + int * const magic_sizep ) + { + enum { bzip2_magic_size = 3, + gzip_magic_size = 2, + lzip_magic_size = 5, + xz_magic_size = 5 }; + const uint8_t bzip2_magic[bzip2_magic_size] = + { 0x42, 0x5A, 0x68 }; // "BZh" + const uint8_t gzip_magic[gzip_magic_size] = + { 0x1F, 0x8B }; + const uint8_t lzip_magic[lzip_magic_size] = + { 0x4C, 0x5A, 0x49, 0x50, 0x01 }; // "LZIP\001" + const uint8_t xz_magic[xz_magic_size] = + { 0xFD, 0x37, 0x7A, 0x58, 0x5A }; // 0xFD, "7zXZ" + + *magic_sizep = readblock( infd, magic_data, magic_buf_size ); + if( *magic_sizep == magic_buf_size ) + { + if( std::memcmp( magic_data, bzip2_magic, bzip2_magic_size ) == 0 && + magic_data[3] >= '1' && magic_data[3] <= '9' && + std::memcmp( magic_data + 4, "1AY&SY", 6 ) == 0 ) + return fmt_bz2; + if( std::memcmp( magic_data, gzip_magic, gzip_magic_size ) == 0 ) + return fmt_gz; + if( std::memcmp( magic_data, lzip_magic, lzip_magic_size ) == 0 && + isvalid_ds( magic_data[lzip_magic_size] ) ) + return fmt_lz; + if( std::memcmp( magic_data, xz_magic, xz_magic_size ) == 0 ) + return fmt_xz; + } + return -1; + } diff --git a/zutils.h b/zutils.h new file mode 100644 index 0000000..064af51 --- /dev/null +++ b/zutils.h @@ -0,0 +1,37 @@ +/* Zutils - Utilities dealing with compressed files + Copyright (C) 2009-2021 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +int readblock( const int fd, uint8_t * const buf, const int size ); +int writeblock( const int fd, const uint8_t * const buf, const int size ); +bool feed_data( const std::string & filename, const int infd, const int outfd, + const uint8_t * magic_data, const int magic_size ); + +struct Children + { + const char * compressor_name; + pid_t pid[2]; // data feeder, compressor + }; +bool good_status( const Children & children, const bool finished ); +bool set_data_feeder( const std::string & filename, int * const infdp, + Children & children, int format_index ); + +enum { magic_buf_size = 10 }; // >= longest extended magic (bzip2) + +// Returns format index or -1 if uncompressed +// +int test_format( const int infd, uint8_t magic_data[], + int * const magic_sizep ); diff --git a/zutilsrc b/zutilsrc new file mode 100644 index 0000000..04a1d69 --- /dev/null +++ b/zutilsrc @@ -0,0 +1,16 @@ +# +# Runtime Configuration file for Zutils +# +# Zutils looks for this file in: +# 1 - $HOME/.zutilsrc +# 2 - ${sysconfdir}/zutilsrc + +# This file sets the compressor and options to be used for each format. +# The command line options override compressors specified in this file. +# Syntax: <format> = <compressor> [options] +# Uncomment each line you want to take effect. + +# bz2 = lbzip2 -n2 +# gz = pigz -p2 +# lz = plzip -n2 +# xz = pixz -p2 |