diff options
Diffstat (limited to '')
-rw-r--r-- | AUTHORS | 1 | ||||
-rw-r--r-- | COPYING | 338 | ||||
-rw-r--r-- | ChangeLog | 237 | ||||
-rw-r--r-- | INSTALL | 83 | ||||
-rw-r--r-- | Makefile.in | 241 | ||||
-rw-r--r-- | NEWS | 18 | ||||
-rw-r--r-- | README | 49 | ||||
-rw-r--r-- | arg_parser.cc | 197 | ||||
-rw-r--r-- | arg_parser.h | 110 | ||||
-rwxr-xr-x | configure | 212 | ||||
-rw-r--r-- | doc/zcat.1 | 121 | ||||
-rw-r--r-- | doc/zcmp.1 | 108 | ||||
-rw-r--r-- | doc/zdiff.1 | 139 | ||||
-rw-r--r-- | doc/zgrep.1 | 194 | ||||
-rw-r--r-- | doc/ztest.1 | 102 | ||||
-rw-r--r-- | doc/zupdate.1 | 122 | ||||
-rw-r--r-- | doc/zutils.info | 988 | ||||
-rw-r--r-- | doc/zutils.texi | 1034 | ||||
-rw-r--r-- | rc.cc | 457 | ||||
-rw-r--r-- | rc.h | 74 | ||||
-rw-r--r-- | recursive.cc | 109 | ||||
-rwxr-xr-x | testsuite/check.sh | 685 | ||||
-rw-r--r-- | testsuite/test.txt | 676 | ||||
-rw-r--r-- | testsuite/test.txt.tar | bin | 0 -> 40960 bytes | |||
-rw-r--r-- | testsuite/test_bad_crc.lz | bin | 0 -> 7376 bytes | |||
-rw-r--r-- | testsuite/zcat_vs.dat | 68 | ||||
-rw-r--r-- | testsuite/zero_bad_crc.gz | bin | 0 -> 20 bytes | |||
-rw-r--r-- | testsuite/zero_bad_crc.lz | bin | 0 -> 36 bytes | |||
-rw-r--r-- | zcat.cc | 392 | ||||
-rw-r--r-- | zcatgrep.cc | 59 | ||||
-rw-r--r-- | zcmp.cc | 534 | ||||
-rw-r--r-- | zcmpdiff.cc | 78 | ||||
-rw-r--r-- | zdiff.cc | 446 | ||||
-rw-r--r-- | zegrep.in | 3 | ||||
-rw-r--r-- | zfgrep.in | 3 | ||||
-rw-r--r-- | zgrep.cc | 417 | ||||
-rw-r--r-- | ztest.cc | 369 | ||||
-rw-r--r-- | zupdate.cc | 506 | ||||
-rw-r--r-- | zutils.cc | 292 | ||||
-rw-r--r-- | zutils.conf | 18 | ||||
-rw-r--r-- | zutils.h | 40 |
41 files changed, 9520 insertions, 0 deletions
@@ -0,0 +1 @@ +The zutils were written by Antonio Diaz Diaz. @@ -0,0 +1,338 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) <year> <name of author> + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..459ca03 --- /dev/null +++ b/ChangeLog @@ -0,0 +1,237 @@ +2024-01-23 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.13 released. + * zutils.cc (test_format): Fix detection of bzip2 with no blocks. + * rc.h (format_order): Put fmt_gz before fmt_bz2. + * zcmpdiff.cc (open_other_instream): Try also other compressed formats. + * zcmp.cc (cmp): Report EOF on empty file like GNU cmp. + * zupdate.cc: Reformat file diagnostics as 'PROGRAM: FILE: MESSAGE'. + * Replace 'verify' with 'check'. + * configure, Makefile.in: New variable 'MAKEINFO'. + +2023-01-07 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.12 released. + * zutilsrc: Rename to zutils.conf. Search for it in $XDG_CONFIG_HOME. + (Suggested by Adam Tuja). + * Allow '-O, --force-format' force also uncompressed format. + * zcmp.cc: New option '-H, --hexadecimal'. + Change long name of option '-s' to '--script'. + (Following a similar change made to GNU ed). + Assign short name '-q' to options '--quiet' and '--silent'. + Separate option '-l, --list' from '-v, --verbose'. + (cmp): Print byte and line in EOF message like GNU cmp. + * zgrep.cc: New options '-G, --basic-regexp', '--label=<label>', + '--line-buffered', '-P, --perl-regexp', '--silent', + '-T, --initial-tab', '-U, --binary'. (Reported by Chris Jamboretz). + New option '-Z, --null'. (Reported by Leah Neukirchen). + * ztest.cc: Exit with status 2 if a file has wrong extension. + * zupdate.cc: New options '-d, --destdir', '-e, --expand-extensions', + '-i, --ignore-errors'. ('-i' suggested by Antoni Sawicki). + (zupdate_file): Pass '-q -s' to zcmp if verbosity < 0. + * Support compress'd (.Z) files through gzip in all utilities. + * rc.cc (show_version): Print the versions of the compressors used. + (show_option_error): New function showing argument and option name. + * zutils.texi: Document that format is detected by its magic bytes. + * check.sh: Test tarlz (if available) as compressor for zupdate. + +2022-01-25 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.11 released. + * zcmp.cc, zdiff.cc (main): Fix race returning 1 instead of 2 when a + compressor is not found or when the wrong format is forced. + * zcmp.cc (getnum): Show option name and valid range if error. + * All utilities: Show option name if error in option argument. + * Add support for zstd format to all utilities. + * 'zdiff -v -V' now prints the version of the diff program used. + * 'zgrep --verbose -V' now prints the version of the grep program used. + * zutils.texi: Document recompression of read-only files by linking. + * zutils.texi: Change GNU Texinfo category to 'Compression'. + (Reported by Alfred M. Szmidt). + +2021-01-05 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.10 released. + * zdiff.cc (set_fifonames): Encode pid in little endian order. + * zupdate.cc (zupdate_file): Fix a portability issue with Solaris 10. + * zutils.texi: Document that 'zgrep -L' fails with GNU grep 3.2 to 3.4. + * check.sh: Test empty input files with all utilities except zupdate. + +2020-06-27 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.9 released. + * zcmp.cc, zdiff.cc: Read standard input only if requested. + * zdiff.cc (main): Pass options '-W' and '-y' to diff. + * zutils.cc (test_format): Detect bzip2 and lzip files better. + * ztest.cc (main): Continue testing if any input file is a terminal. + If verbosity >= 1, print number of files that failed the test. + * zcat.cc, zgrep.cc, ztest.cc (main): Check return value of close(infd). + * zutils.cc (good_status): Ignore trailing data remaining in feeder. + * zupdate.cc (zupdate_file): Support new and old lzip option '-o'. + Keep combined extensions: tgz, tbz, tbz2, txz --> tlz. + Quote file names in zcmp_command to allow file names with spaces. + * *.cc (main): Set a valid invocation_name even if argc == 0. + * zutils.texi: Improve descriptions of zcat, zcmp, and zdiff. + +2019-01-01 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.8 released. + * zcat.cc: Fix a buffer overflow on outbuf when '-v' is used. + * zcat.cc (cat): A canary byte has been added to outbuf. + * New option '-R, --dereference-recursive'. + * Option '-r, --recursive' now skips symlinks. + * If no files and recursive, examine current working directory. + * recursive.cc (test_full_name): Detect directory loops. + * recursive.cc: Ignore directories if not --recursive. + * recursive.cc: Remove extra trailing slashes from directory args. + * zcatgrep.cc (open_instream): Show correct errno. + * zutils.cc (good_status): Wait for killed child. + * Test and document continuation or exit of zcat, zgrep, ztest, + and zupdate in case of error. + * configure: Accept appending to CXXFLAGS; 'CXXFLAGS+=OPTIONS'. + +2018-02-13 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.7 released. + * zgrep.cc (main): Pass option '--color' to grep. + * check.sh: Add new tests for zgrep. + +2017-04-05 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.6 released. + * zcmp.cc: Accept 'B' suffix in '--ignore-initial=1kB:1234B'. + * zutils.cc (feed_data): Show input file name in error messages. + +2016-05-15 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.5 released. + * zupdate.cc (zupdate_file): Pass '-q' to zcmp if verbosity < 0. + * zcat.cc, zgrep.cc, ztest.cc (main): Don't use stdin more than once. + * zdiff.cc (set_fifonames): Use '_' if both names are different. + * configure: Avoid warning on some shells when testing for g++. + * Makefile.in: Detect the existence of install-info. + * check.sh: A POSIX shell is required to run the tests. + +2015-05-29 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.4 released. + * Option '--format' has been renamed to '-O, --force-format'. + * Add new option '-M, --format=<list>' to all utilities. + * zgrep.cc (main): Pass '-e' to grep if pattern begins with '-'. + * Makefile.in: New targets 'install*-compress'. + +2014-08-30 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.3 released. + * check.sh: Fix two values of expected exit status. + * zutils.texi: Document that '--format' does not check format. + * Add two missing #includes. + * Change license to GPL version 2 or later. + +2014-02-01 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.2 released. + * New utility; zupdate. + * Remove zutils executable. Utilities are now independent executables. + * zgrep.cc: Fix the exit status returned on error. + * zutils.texinfo: Rename to zutils.texi. + +2013-08-02 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.1 released. + * Add options '--bz2', '--gz', '--lz', and '--xz' to all utilities. + * Add runtime configuration file 'zutilsrc'. + * New function 'good_status' checks exit status of all children. + * Fix all uses of decompressed/uncompressed in the documentation. + +2013-05-31 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 1.0 released. + * Add new option '--format' to all utilities. + * main.cc (main): Make 'grep_show_name' tri-state so that file name + is not prefixed to the output by default when searching one file + and '--recursive' has not been selected. + * zgrep.cc: Fix output of option '-L' (it behaved like '-l'). + * zcmp.cc: Fix deadlock when option '-n' is used. + * zdiff.cc (set_data_feeder): Call compressor with option '-q' only + if verbosity < 0. + * zutils.cc (set_data_feeder): Likewise. + * Change quote characters in messages as advised by GNU Standards. + * configure: Options now accept a separate argument. + Rename 'datadir' to 'datarootdir'. Ignore environment variables. + * Makefile.in: New target 'install-bin'. + * Use 'setmode' instead of '_setmode' on Windows and OS/2. + * zcat.cc (Line_number): Fix a portability issue with Solaris 9. + * INSTALL: Document installing zutils along with GNU gzip. + +2011-01-11 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.9 released. + * configure: New variables 'DIFF' and 'GREP'. + * zcmp.cc: Fix deadlock when files differ. + * zgrep.cc: Fix deadlock when binary file matches. + +2010-11-15 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.8 released. + * main.cc: New options '--zcat', '--zgrep', and '--ztest'. + * zcat.cc: New file implementing zcat+cat functionality in C++. + * zcmp.cc: New file implementing zcmp+cmp functionality in C++. + * doc/zcmp.1: New file. + * Remove files zcmp.in, zdiff.in. + * zdiff.cc: New file implementing zdiff functionality in C++. + * zgrep.cc: New file implementing zgrep functionality in C++. + * All mentions to zegrep and zfgrep have been removed from the + documentation because egrep and fgrep are deprecated. + * ztest.cc: New file implementing ztest functionality in C++. + * Makefile.in: Add quotes to directory names. + * check.sh: Use 'test.txt' instead of 'COPYING' for testing. + * configure: Remove environment safeguards as requested by + Richard Stallman. Now environment variables affect configure. + +2009-10-21 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.7 released. + * New utility; ztest. + * zcat.in: New option '-r, --recursive'. + +2009-10-05 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.6 released. + * zcat.in, zgrep.in: Remove again default compressor. The format of + the data read from stdin is now automatically detected. + * Makefile.in: Add option '--name' to help2man invocation. + +2009-10-01 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.5 released. + * zcat.in, zgrep.in: Read again data from stdin. + * Add again default compressor for stdin only. + +2009-09-17 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.4 released. + * Add two new utilities; zegrep and zfgrep. + * Add zutils executable which recognizes file formats. + +2009-08-28 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.3 released. + * Remove default compressor. + * zcat.in, zgrep.in: Don't read data from stdin. + * Update home page and mailing list addresses. + +2009-08-13 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.2 released. + * Add support for xz. + +2009-08-07 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.1 released. + + +Copyright (C) 2009-2024 Antonio Diaz Diaz. + +This file is a collection of facts, and thus it is not copyrightable, but just +in case, you have unlimited permission to copy, distribute, and modify it. @@ -0,0 +1,83 @@ +Requirements +------------ +You will need a C++98 compiler with support for 'long long'. +(gcc 3.3.6 or newer is recommended). +I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards +compliant compiler. +Gcc is available at http://gcc.gnu.org. + +POSIX compliant versions of diff and grep are required for zdiff and zgrep. + +(Option -L of zgrep fails (prints wrong results, returns wrong status, and +even hangs) when using GNU grep versions 3.2 to 3.4 inclusive because of a +wrong change in the exit status of grep, which was reverted in GNU grep 3.5). + +Compressors for bzip2, gzip and lzip formats are required to run the tests. + +If you are installing zutils along with GNU gzip and want to keep the +gzip scripts, the recommended method is to configure gzip as follows: + + ./configure --program-transform-name='s/^z/gz/' + +This renames, at installation time, the gzip scripts and man pages to +'gzcat', 'gzcat.1', etc, avoiding the name clashing with the programs and +man pages from zutils. + + +Procedure +--------- +1. Unpack the archive if you have not done so already: + + tar -xf zutils[version].tar.lz +or + lzip -cd zutils[version].tar.lz | tar -xf - + +This creates the directory ./zutils[version] containing the source code +extracted from the archive. + +2. Change to zutils directory and run configure. + (Try 'configure --help' for usage instructions). + + cd zutils[version] + ./configure + +3. Run make. + + make + +4. Optionally, type 'make check' to run the tests that come with zutils. + +5. Type 'make install' to install the programs and any data files and + documentation. You need root privileges to install into a prefix owned + by root. + + Or type 'make install-compress', which additionally compresses the + info manual and the man pages after installation. + (Installing compressed docs may become the default in the future). + + You can install only the programs, the info manual, or the man pages by + typing 'make install-bin', 'make install-info', or 'make install-man' + respectively. + + +Another way +----------- +You can also compile zutils into a separate directory. +To do this, you must use a version of 'make' that supports the variable +'VPATH', such as GNU 'make'. 'cd' to the directory where you want the +object files and executables to go and run the 'configure' script. +'configure' automatically checks for the source code in '.', in '..', and +in the directory that 'configure' is in. + +'configure' recognizes the option '--srcdir=DIR' to control where to look +for the source code. Usually 'configure' can determine that directory +automatically. + +After running 'configure', you can run 'make' and 'make install' as +explained above. + + +Copyright (C) 2009-2024 Antonio Diaz Diaz. + +This file is free documentation: you have unlimited permission to copy, +distribute, and modify it. diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 0000000..b8984fd --- /dev/null +++ b/Makefile.in @@ -0,0 +1,241 @@ + +DISTNAME = $(pkgname)-$(pkgversion) +INSTALL = install +INSTALL_PROGRAM = $(INSTALL) -m 755 +INSTALL_SCRIPT = $(INSTALL) -m 755 +INSTALL_DATA = $(INSTALL) -m 644 +INSTALL_DIR = $(INSTALL) -d -m 755 +SHELL = /bin/sh +CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 + +objs = arg_parser.o rc.o zutils.o \ + zcat.o zcmp.o zdiff.o zgrep.o ztest.o zupdate.o +zcat_objs = arg_parser.o rc.o zutils.o zcat.o +zcmp_objs = arg_parser.o rc.o zutils.o zcmp.o +zdiff_objs = arg_parser.o rc.o zutils.o zdiff.o +zgrep_objs = arg_parser.o rc.o zutils.o zgrep.o +ztest_objs = arg_parser.o rc.o zutils.o ztest.o +zupdate_objs = arg_parser.o rc.o zupdate.o +programs = zcat zcmp zdiff zgrep ztest zupdate +scripts = zegrep zfgrep + + +.PHONY : all install install-bin install-info install-man \ + install-strip install-compress install-strip-compress \ + install-bin-strip install-info-compress install-man-compress \ + uninstall uninstall-bin uninstall-info uninstall-man \ + doc info man check dist clean distclean + +all : $(programs) $(scripts) + +zcat : $(zcat_objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(zcat_objs) + +zcmp : $(zcmp_objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(zcmp_objs) + +zdiff : $(zdiff_objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(zdiff_objs) + +zegrep : zegrep.in + cat $(VPATH)/zegrep.in > $@ + chmod a+x zegrep + +zfgrep : zfgrep.in + cat $(VPATH)/zfgrep.in > $@ + chmod a+x zfgrep + +zgrep : $(zgrep_objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(zgrep_objs) + +ztest : $(ztest_objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(ztest_objs) + +zupdate : $(zupdate_objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(zupdate_objs) + +rc.o : rc.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -DSYSCONFDIR=\"$(sysconfdir)\" -c -o $@ $< + +zdiff.o : zdiff.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DDIFF=\"$(DIFF)\" -c -o $@ $< + +zgrep.o : zgrep.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DGREP=\"$(GREP)\" -c -o $@ $< + +%.o : %.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< + +# prevent 'make' from trying to remake source files +$(VPATH)/configure $(VPATH)/Makefile.in $(VPATH)/doc/$(pkgname).texi : ; +%.h %.cc : ; + +$(objs) : Makefile +$(scripts) : Makefile +arg_parser.o : arg_parser.h +rc.o : arg_parser.h rc.h +zcat.o : arg_parser.h rc.h zutils.h recursive.cc zcatgrep.cc +zcmp.o : arg_parser.h rc.h zutils.h zcmpdiff.cc +zdiff.o : arg_parser.h rc.h zutils.h zcmpdiff.cc +zgrep.o : arg_parser.h rc.h zutils.h recursive.cc zcatgrep.cc +ztest.o : arg_parser.h rc.h zutils.h recursive.cc +zupdate.o : arg_parser.h rc.h recursive.cc +zutils.o : rc.h zutils.h + +doc : info man + +info : $(VPATH)/doc/$(pkgname).info + +$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi + cd $(VPATH)/doc && $(MAKEINFO) $(pkgname).texi + +man : $(VPATH)/doc/zcat.1 $(VPATH)/doc/zcmp.1 $(VPATH)/doc/zdiff.1 \ + $(VPATH)/doc/zgrep.1 $(VPATH)/doc/ztest.1 $(VPATH)/doc/zupdate.1 + +$(VPATH)/doc/zcat.1 : zcat + help2man -n 'decompress and concatenate files to standard output' \ + -o $@ --info-page=$(pkgname) ./zcat + +$(VPATH)/doc/zcmp.1 : zcmp + help2man -n 'decompress and compare two files byte by byte' \ + -o $@ --info-page=$(pkgname) ./zcmp + +$(VPATH)/doc/zdiff.1 : zdiff + help2man -n 'decompress and compare two files line by line' \ + -o $@ --info-page=$(pkgname) ./zdiff + +$(VPATH)/doc/zgrep.1 : zgrep + help2man -n 'search compressed files for a regular expression' \ + -o $@ --info-page=$(pkgname) ./zgrep + +$(VPATH)/doc/ztest.1 : ztest + help2man -n 'check the integrity of compressed files' \ + -o $@ --info-page=$(pkgname) ./ztest + +$(VPATH)/doc/zupdate.1 : zupdate + help2man -n 'recompress bzip2, gzip, xz, zstd files to lzip format' \ + -o $@ --info-page=$(pkgname) ./zupdate + +Makefile : $(VPATH)/configure $(VPATH)/Makefile.in + ./config.status + +check : all + @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion) + +install : install-bin install-info install-man +install-strip : install-bin-strip install-info install-man +install-compress : install-bin install-info-compress install-man-compress +install-strip-compress : install-bin-strip install-info-compress install-man-compress + +install-bin : all + if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi + $(INSTALL_PROGRAM) ./zcat "$(DESTDIR)$(bindir)/zcat" + $(INSTALL_PROGRAM) ./zcmp "$(DESTDIR)$(bindir)/zcmp" + $(INSTALL_PROGRAM) ./zdiff "$(DESTDIR)$(bindir)/zdiff" + $(INSTALL_SCRIPT) ./zegrep "$(DESTDIR)$(bindir)/zegrep" + $(INSTALL_SCRIPT) ./zfgrep "$(DESTDIR)$(bindir)/zfgrep" + $(INSTALL_PROGRAM) ./zgrep "$(DESTDIR)$(bindir)/zgrep" + $(INSTALL_PROGRAM) ./ztest "$(DESTDIR)$(bindir)/ztest" + $(INSTALL_PROGRAM) ./zupdate "$(DESTDIR)$(bindir)/zupdate" + if [ ! -e "$(DESTDIR)$(sysconfdir)/$(pkgname).conf" ] ; then \ + if [ ! -d "$(DESTDIR)$(sysconfdir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(sysconfdir)" ; fi ; \ + $(INSTALL_DATA) $(VPATH)/$(pkgname).conf "$(DESTDIR)$(sysconfdir)/$(pkgname).conf" ; \ + fi + +install-bin-strip : all + $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install-bin + +install-info : + if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi + -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* + $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info" + -if $(CAN_RUN_INSTALLINFO) ; then \ + install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + fi + +install-info-compress : install-info + lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info" + +install-man : + if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi + -rm -f "$(DESTDIR)$(mandir)/man1/zcat.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zcmp.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zdiff.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zgrep.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/ztest.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zupdate.1"* + $(INSTALL_DATA) $(VPATH)/doc/zcat.1 "$(DESTDIR)$(mandir)/man1/zcat.1" + $(INSTALL_DATA) $(VPATH)/doc/zcmp.1 "$(DESTDIR)$(mandir)/man1/zcmp.1" + $(INSTALL_DATA) $(VPATH)/doc/zdiff.1 "$(DESTDIR)$(mandir)/man1/zdiff.1" + $(INSTALL_DATA) $(VPATH)/doc/zgrep.1 "$(DESTDIR)$(mandir)/man1/zgrep.1" + $(INSTALL_DATA) $(VPATH)/doc/ztest.1 "$(DESTDIR)$(mandir)/man1/ztest.1" + $(INSTALL_DATA) $(VPATH)/doc/zupdate.1 "$(DESTDIR)$(mandir)/man1/zupdate.1" + +install-man-compress : install-man + lzip -v -9 "$(DESTDIR)$(mandir)/man1/zcat.1" + lzip -v -9 "$(DESTDIR)$(mandir)/man1/zcmp.1" + lzip -v -9 "$(DESTDIR)$(mandir)/man1/zdiff.1" + lzip -v -9 "$(DESTDIR)$(mandir)/man1/zgrep.1" + lzip -v -9 "$(DESTDIR)$(mandir)/man1/ztest.1" + lzip -v -9 "$(DESTDIR)$(mandir)/man1/zupdate.1" + +uninstall : uninstall-man uninstall-info uninstall-bin + +uninstall-bin : + -rm -f "$(DESTDIR)$(bindir)/zcat" + -rm -f "$(DESTDIR)$(bindir)/zcmp" + -rm -f "$(DESTDIR)$(bindir)/zdiff" + -rm -f "$(DESTDIR)$(bindir)/zegrep" + -rm -f "$(DESTDIR)$(bindir)/zfgrep" + -rm -f "$(DESTDIR)$(bindir)/zgrep" + -rm -f "$(DESTDIR)$(bindir)/ztest" + -rm -f "$(DESTDIR)$(bindir)/zupdate" + -rm -f "$(DESTDIR)$(sysconfdir)/$(pkgname).conf" + +uninstall-info : + -if $(CAN_RUN_INSTALLINFO) ; then \ + install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + fi + -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* + +uninstall-man : + -rm -f "$(DESTDIR)$(mandir)/man1/zcat.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zcmp.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zdiff.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zgrep.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/ztest.1"* + -rm -f "$(DESTDIR)$(mandir)/man1/zupdate.1"* + +dist : doc + ln -sf $(VPATH) $(DISTNAME) + tar -Hustar --owner=root --group=root -cvf $(DISTNAME).tar \ + $(DISTNAME)/AUTHORS \ + $(DISTNAME)/COPYING \ + $(DISTNAME)/ChangeLog \ + $(DISTNAME)/INSTALL \ + $(DISTNAME)/Makefile.in \ + $(DISTNAME)/NEWS \ + $(DISTNAME)/README \ + $(DISTNAME)/configure \ + $(DISTNAME)/doc/*.1 \ + $(DISTNAME)/doc/$(pkgname).info \ + $(DISTNAME)/doc/$(pkgname).texi \ + $(DISTNAME)/$(pkgname).conf \ + $(DISTNAME)/*.h \ + $(DISTNAME)/*.cc \ + $(DISTNAME)/z*.in \ + $(DISTNAME)/testsuite/check.sh \ + $(DISTNAME)/testsuite/test.txt \ + $(DISTNAME)/testsuite/test.txt.tar \ + $(DISTNAME)/testsuite/zcat_vs.dat \ + $(DISTNAME)/testsuite/test_bad_crc.lz \ + $(DISTNAME)/testsuite/zero_bad_crc.lz \ + $(DISTNAME)/testsuite/zero_bad_crc.gz + rm -f $(DISTNAME) + lzip -v -9 $(DISTNAME).tar + +clean : + -rm -f $(programs) $(scripts) $(objs) + +distclean : clean + -rm -f Makefile config.status *.tar *.tar.lz @@ -0,0 +1,18 @@ +Changes in version 1.13: + +The detection of bzip2 files with no compressed blocks has been fixed. +(Error introduced in version 1.9). + +When zcat, zcmp, zdiff, or zgrep need to try compressed file names, gzip +(.gz) is now tried before bzip2 (.bz2). + +When only one compressed file is passed to zcmp or zdiff, they now try to +compare it with a compressed file of any of the remaining formats if the +corresponding uncompressed file does not exist. + +zcmp now reports EOF on empty file like GNU cmp: +"zcmp: EOF on FILE which is empty". + +File diagnostics in zupdate have been reformatted as 'PROGRAM: FILE: MESSAGE'. + +The variable MAKEINFO has been added to configure and Makefile.in. @@ -0,0 +1,49 @@ +Description + +Zutils is a collection of utilities able to process any combination of +compressed and uncompressed files transparently. If any file given, +including standard input, is compressed, its decompressed content is used. +Compressed files are decompressed on the fly; no temporary files are +created. Data format is detected by its identifier string (magic bytes), not +by the file name extension. Empty files are considered uncompressed. + +These utilities are not wrapper scripts but safer and more efficient C++ +programs. In particular the option '--recursive' is very efficient in +those utilities supporting it. + +The utilities provided are zcat, zcmp, zdiff, zgrep, ztest, and zupdate. +The formats supported are bzip2, gzip, lzip, xz, and zstd. +Zutils uses external compressors. The compressor to be used for each format +is configurable at runtime. + +zcat, zcmp, zdiff, and zgrep are improved replacements for the shell scripts +provided by GNU gzip. ztest is unique to zutils. zupdate is similar to +gzip's znew. + +NOTE: Bzip2 and lzip provide well-defined values of exit status, which makes +them safe to use with zutils. Gzip and xz may return ambiguous warning +values, making them less reliable back ends for zutils. Zstd currently does +not even document its exit status in its man page. + +FORMAT NOTE 1: The option '--format' allows the processing of a subset +of formats in recursive mode and when trying compressed file names. For +example, use the following command to search for the string 'foo' in +gzip and lzip files only: +'zgrep foo -r --format=gz,lz somedir somefile.tar'. + +FORMAT NOTE 2: The standard POSIX compress format (.Z) is obsolete and is +only supported through gzip. For this to work, the gzip program used (for +example GNU gzip) must be able to decompress .Z files. + +LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never have +been compressed. Decompressed is used to refer to data which have undergone +the process of decompression. + + +Copyright (C) 2009-2024 Antonio Diaz Diaz. + +This file is free documentation: you have unlimited permission to copy, +distribute, and modify it. + +The file Makefile.in is a data file used by configure to produce the Makefile. +It has the same copyright owner and permissions that configure itself. diff --git a/arg_parser.cc b/arg_parser.cc new file mode 100644 index 0000000..0c04d8e --- /dev/null +++ b/arg_parser.cc @@ -0,0 +1,197 @@ +/* Arg_parser - POSIX/GNU command-line argument parser. (C++ version) + Copyright (C) 2006-2024 Antonio Diaz Diaz. + + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#include <cstring> +#include <string> +#include <vector> + +#include "arg_parser.h" + + +bool Arg_parser::parse_long_option( const char * const opt, const char * const arg, + const Option options[], int & argind ) + { + unsigned len; + int index = -1; + bool exact = false, ambig = false; + + for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ; + + // Test all long options for either exact match or abbreviated matches. + for( int i = 0; options[i].code != 0; ++i ) + if( options[i].long_name && + std::strncmp( options[i].long_name, &opt[2], len ) == 0 ) + { + if( std::strlen( options[i].long_name ) == len ) // Exact match found + { index = i; exact = true; break; } + else if( index < 0 ) index = i; // First nonexact match found + else if( options[index].code != options[i].code || + options[index].has_arg != options[i].has_arg ) + ambig = true; // Second or later nonexact match found + } + + if( ambig && !exact ) + { + error_ = "option '"; error_ += opt; error_ += "' is ambiguous"; + return false; + } + + if( index < 0 ) // nothing found + { + error_ = "unrecognized option '"; error_ += opt; error_ += '\''; + return false; + } + + ++argind; + data.push_back( Record( options[index].code, options[index].long_name ) ); + + if( opt[len+2] ) // '--<long_option>=<argument>' syntax + { + if( options[index].has_arg == no ) + { + error_ = "option '--"; error_ += options[index].long_name; + error_ += "' doesn't allow an argument"; + return false; + } + if( options[index].has_arg == yes && !opt[len+3] ) + { + error_ = "option '--"; error_ += options[index].long_name; + error_ += "' requires an argument"; + return false; + } + data.back().argument = &opt[len+3]; + return true; + } + + if( options[index].has_arg == yes ) + { + if( !arg || !arg[0] ) + { + error_ = "option '--"; error_ += options[index].long_name; + error_ += "' requires an argument"; + return false; + } + ++argind; data.back().argument = arg; + return true; + } + + return true; + } + + +bool Arg_parser::parse_short_option( const char * const opt, const char * const arg, + const Option options[], int & argind ) + { + int cind = 1; // character index in opt + + while( cind > 0 ) + { + int index = -1; + const unsigned char c = opt[cind]; + + if( c != 0 ) + for( int i = 0; options[i].code; ++i ) + if( c == options[i].code ) + { index = i; break; } + + if( index < 0 ) + { + error_ = "invalid option -- '"; error_ += c; error_ += '\''; + return false; + } + + data.push_back( Record( c ) ); + if( opt[++cind] == 0 ) { ++argind; cind = 0; } // opt finished + + if( options[index].has_arg != no && cind > 0 && opt[cind] ) + { + data.back().argument = &opt[cind]; ++argind; cind = 0; + } + else if( options[index].has_arg == yes ) + { + if( !arg || !arg[0] ) + { + error_ = "option requires an argument -- '"; error_ += c; + error_ += '\''; + return false; + } + data.back().argument = arg; ++argind; cind = 0; + } + } + return true; + } + + +Arg_parser::Arg_parser( const int argc, const char * const argv[], + const Option options[], const bool in_order ) + { + if( argc < 2 || !argv || !options ) return; + + std::vector< const char * > non_options; // skipped non-options + int argind = 1; // index in argv + + while( argind < argc ) + { + const unsigned char ch1 = argv[argind][0]; + const unsigned char ch2 = ch1 ? argv[argind][1] : 0; + + if( ch1 == '-' && ch2 ) // we found an option + { + const char * const opt = argv[argind]; + const char * const arg = ( argind + 1 < argc ) ? argv[argind+1] : 0; + if( ch2 == '-' ) + { + if( !argv[argind][2] ) { ++argind; break; } // we found "--" + else if( !parse_long_option( opt, arg, options, argind ) ) break; + } + else if( !parse_short_option( opt, arg, options, argind ) ) break; + } + else + { + if( in_order ) data.push_back( Record( argv[argind++] ) ); + else non_options.push_back( argv[argind++] ); + } + } + if( !error_.empty() ) data.clear(); + else + { + for( unsigned i = 0; i < non_options.size(); ++i ) + data.push_back( Record( non_options[i] ) ); + while( argind < argc ) + data.push_back( Record( argv[argind++] ) ); + } + } + + +Arg_parser::Arg_parser( const char * const opt, const char * const arg, + const Option options[] ) + { + if( !opt || !opt[0] || !options ) return; + + if( opt[0] == '-' && opt[1] ) // we found an option + { + int argind = 1; // dummy + if( opt[1] == '-' ) + { if( opt[2] ) parse_long_option( opt, arg, options, argind ); } + else + parse_short_option( opt, arg, options, argind ); + if( !error_.empty() ) data.clear(); + } + else data.push_back( Record( opt ) ); + } diff --git a/arg_parser.h b/arg_parser.h new file mode 100644 index 0000000..1eeec9a --- /dev/null +++ b/arg_parser.h @@ -0,0 +1,110 @@ +/* Arg_parser - POSIX/GNU command-line argument parser. (C++ version) + Copyright (C) 2006-2024 Antonio Diaz Diaz. + + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +/* Arg_parser reads the arguments in 'argv' and creates a number of + option codes, option arguments, and non-option arguments. + + In case of error, 'error' returns a non-empty error message. + + 'options' is an array of 'struct Option' terminated by an element + containing a code which is zero. A null long_name means a short-only + option. A code value outside the unsigned char range means a long-only + option. + + Arg_parser normally makes it appear as if all the option arguments + were specified before all the non-option arguments for the purposes + of parsing, even if the user of your program intermixed option and + non-option arguments. If you want the arguments in the exact order + the user typed them, call 'Arg_parser' with 'in_order' = true. + + The argument '--' terminates all options; any following arguments are + treated as non-option arguments, even if they begin with a hyphen. + + The syntax for optional option arguments is '-<short_option><argument>' + (without whitespace), or '--<long_option>=<argument>'. +*/ + +class Arg_parser + { +public: + enum Has_arg { no, yes, maybe }; + + struct Option + { + int code; // Short option letter or code ( code != 0 ) + const char * long_name; // Long option name (maybe null) + Has_arg has_arg; + }; + +private: + struct Record + { + int code; + std::string parsed_name; + std::string argument; + explicit Record( const unsigned char c ) + : code( c ), parsed_name( "-" ) { parsed_name += c; } + Record( const int c, const char * const long_name ) + : code( c ), parsed_name( "--" ) { parsed_name += long_name; } + explicit Record( const char * const arg ) : code( 0 ), argument( arg ) {} + }; + + const std::string empty_arg; + std::string error_; + std::vector< Record > data; + + bool parse_long_option( const char * const opt, const char * const arg, + const Option options[], int & argind ); + bool parse_short_option( const char * const opt, const char * const arg, + const Option options[], int & argind ); + +public: + Arg_parser( const int argc, const char * const argv[], + const Option options[], const bool in_order = false ); + + // Restricted constructor. Parses a single token and argument (if any). + Arg_parser( const char * const opt, const char * const arg, + const Option options[] ); + + const std::string & error() const { return error_; } + + // The number of arguments parsed. May be different from argc. + int arguments() const { return data.size(); } + + /* If code( i ) is 0, argument( i ) is a non-option. + Else argument( i ) is the option's argument (or empty). */ + int code( const int i ) const + { + if( i >= 0 && i < arguments() ) return data[i].code; + else return 0; + } + + // Full name of the option parsed (short or long). + const std::string & parsed_name( const int i ) const + { + if( i >= 0 && i < arguments() ) return data[i].parsed_name; + else return empty_arg; + } + + const std::string & argument( const int i ) const + { + if( i >= 0 && i < arguments() ) return data[i].argument; + else return empty_arg; + } + }; diff --git a/configure b/configure new file mode 100755 index 0000000..6bf4999 --- /dev/null +++ b/configure @@ -0,0 +1,212 @@ +#! /bin/sh +# configure script for Zutils - Utilities dealing with compressed files +# Copyright (C) 2009-2024 Antonio Diaz Diaz. +# +# This configure script is free software: you have unlimited permission +# to copy, distribute, and modify it. + +pkgname=zutils +pkgversion=1.13 +srctrigger=doc/${pkgname}.texi + +# clear some things potentially inherited from environment. +LC_ALL=C +export LC_ALL +srcdir= +prefix=/usr/local +exec_prefix='$(prefix)' +bindir='$(exec_prefix)/bin' +datarootdir='$(prefix)/share' +infodir='$(datarootdir)/info' +mandir='$(datarootdir)/man' +sysconfdir='$(prefix)/etc' +CXX=g++ +CPPFLAGS= +CXXFLAGS='-Wall -W -O2' +LDFLAGS= +MAKEINFO=makeinfo +DIFF=diff +GREP=grep + +# checking whether we are using GNU C++. +/bin/sh -c "${CXX} --version" > /dev/null 2>&1 || { CXX=c++ ; CXXFLAGS=-O2 ; } + +# Loop over all args +args= +no_create= +while [ $# != 0 ] ; do + + # Get the first arg, and shuffle + option=$1 ; arg2=no + shift + + # Add the argument quoted to args + if [ -z "${args}" ] ; then args="\"${option}\"" + else args="${args} \"${option}\"" ; fi + + # Split out the argument for options that take them + case ${option} in + *=*) optarg=`echo "${option}" | sed -e 's,^[^=]*=,,;s,/$,,'` ;; + esac + + # Process the options + case ${option} in + --help | -h) + echo "Usage: $0 [OPTION]... [VAR=VALUE]..." + echo + echo "To assign makefile variables (e.g., CXX, CXXFLAGS...), specify them as" + echo "arguments to configure in the form VAR=VALUE." + echo + echo "Options and variables: [defaults in brackets]" + echo " -h, --help display this help and exit" + echo " -V, --version output version information and exit" + echo " --srcdir=DIR find the source code in DIR [. or ..]" + echo " --prefix=DIR install into DIR [${prefix}]" + echo " --exec-prefix=DIR base directory for arch-dependent files [${exec_prefix}]" + echo " --bindir=DIR user executables directory [${bindir}]" + echo " --datarootdir=DIR base directory for doc and data [${datarootdir}]" + echo " --infodir=DIR info files directory [${infodir}]" + echo " --mandir=DIR man pages directory [${mandir}]" + echo " --sysconfdir=DIR read-only single-machine data directory [${sysconfdir}]" + echo " CXX=COMPILER C++ compiler to use [${CXX}]" + echo " CPPFLAGS=OPTIONS command-line options for the preprocessor [${CPPFLAGS}]" + echo " CXXFLAGS=OPTIONS command-line options for the C++ compiler [${CXXFLAGS}]" + echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS" + echo " LDFLAGS=OPTIONS command-line options for the linker [${LDFLAGS}]" + echo " MAKEINFO=NAME makeinfo program to use [${MAKEINFO}]" + echo " DIFF=NAME diff program to use with zdiff [${DIFF}]" + echo " GREP=NAME grep program to use with zgrep [${GREP}]" + echo + exit 0 ;; + --version | -V) + echo "Configure script for ${pkgname} version ${pkgversion}" + exit 0 ;; + --srcdir) srcdir=$1 ; arg2=yes ;; + --prefix) prefix=$1 ; arg2=yes ;; + --exec-prefix) exec_prefix=$1 ; arg2=yes ;; + --bindir) bindir=$1 ; arg2=yes ;; + --datarootdir) datarootdir=$1 ; arg2=yes ;; + --infodir) infodir=$1 ; arg2=yes ;; + --mandir) mandir=$1 ; arg2=yes ;; + --sysconfdir) sysconfdir=$1 ; arg2=yes ;; + + --srcdir=*) srcdir=${optarg} ;; + --prefix=*) prefix=${optarg} ;; + --exec-prefix=*) exec_prefix=${optarg} ;; + --bindir=*) bindir=${optarg} ;; + --datarootdir=*) datarootdir=${optarg} ;; + --infodir=*) infodir=${optarg} ;; + --mandir=*) mandir=${optarg} ;; + --sysconfdir=*) sysconfdir=${optarg} ;; + --no-create) no_create=yes ;; + + CXX=*) CXX=${optarg} ;; + CPPFLAGS=*) CPPFLAGS=${optarg} ;; + CXXFLAGS=*) CXXFLAGS=${optarg} ;; + CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;; + LDFLAGS=*) LDFLAGS=${optarg} ;; + MAKEINFO=*) MAKEINFO=${optarg} ;; + DIFF=*) DIFF=${optarg} ;; + GREP=*) GREP=${optarg} ;; + + --*) + echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; + *=* | *-*-*) ;; + *) + echo "configure: unrecognized option: '${option}'" 1>&2 + echo "Try 'configure --help' for more information." 1>&2 + exit 1 ;; + esac + + # Check if the option took a separate argument + if [ "${arg2}" = yes ] ; then + if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift + else echo "configure: Missing argument to '${option}'" 1>&2 + exit 1 + fi + fi +done + +# Find the source code, if location was not specified. +srcdirtext= +if [ -z "${srcdir}" ] ; then + srcdirtext="or . or .." ; srcdir=. + if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi + if [ ! -r "${srcdir}/${srctrigger}" ] ; then + ## the sed command below emulates the dirname command + srcdir=`echo "$0" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` + fi +fi + +if [ ! -r "${srcdir}/${srctrigger}" ] ; then + echo "configure: Can't find source code in ${srcdir} ${srcdirtext}" 1>&2 + echo "configure: (At least ${srctrigger} is missing)." 1>&2 + exit 1 +fi + +# Set srcdir to . if that's what it is. +if [ "`pwd`" = "`cd "${srcdir}" ; pwd`" ] ; then srcdir=. ; fi + +echo +if [ -z "${no_create}" ] ; then + echo "creating config.status" + rm -f config.status + cat > config.status << EOF +#! /bin/sh +# This file was generated automatically by configure. Don't edit. +# Run this file to recreate the current configuration. +# +# This script is free software: you have unlimited permission +# to copy, distribute, and modify it. + +exec /bin/sh "$0" ${args} --no-create +EOF + chmod +x config.status +fi + +echo "creating Makefile" +echo "VPATH = ${srcdir}" +echo "prefix = ${prefix}" +echo "exec_prefix = ${exec_prefix}" +echo "bindir = ${bindir}" +echo "datarootdir = ${datarootdir}" +echo "infodir = ${infodir}" +echo "mandir = ${mandir}" +echo "sysconfdir = ${sysconfdir}" +echo "CXX = ${CXX}" +echo "CPPFLAGS = ${CPPFLAGS}" +echo "CXXFLAGS = ${CXXFLAGS}" +echo "LDFLAGS = ${LDFLAGS}" +echo "MAKEINFO = ${MAKEINFO}" +echo "DIFF = ${DIFF}" +echo "GREP = ${GREP}" +rm -f Makefile +cat > Makefile << EOF +# Makefile for Zutils - Utilities dealing with compressed files +# Copyright (C) 2009-2024 Antonio Diaz Diaz. +# This file was generated automatically by configure. Don't edit. +# +# This Makefile is free software: you have unlimited permission +# to copy, distribute, and modify it. + +pkgname = ${pkgname} +pkgversion = ${pkgversion} +VPATH = ${srcdir} +prefix = ${prefix} +exec_prefix = ${exec_prefix} +bindir = ${bindir} +datarootdir = ${datarootdir} +infodir = ${infodir} +mandir = ${mandir} +sysconfdir = ${sysconfdir} +CXX = ${CXX} +CPPFLAGS = ${CPPFLAGS} +CXXFLAGS = ${CXXFLAGS} +LDFLAGS = ${LDFLAGS} +MAKEINFO = ${MAKEINFO} +DIFF = ${DIFF} +GREP = ${GREP} +EOF +cat "${srcdir}/Makefile.in" >> Makefile + +echo "OK. Now you can run make." diff --git a/doc/zcat.1 b/doc/zcat.1 new file mode 100644 index 0000000..a27254b --- /dev/null +++ b/doc/zcat.1 @@ -0,0 +1,121 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2. +.TH ZCAT "1" "January 2024" "zutils 1.13" "User Commands" +.SH NAME +zcat \- decompress and concatenate files to standard output +.SH SYNOPSIS +.B zcat +[\fI\,options\/\fR] [\fI\,files\/\fR] +.SH DESCRIPTION +zcat copies each file argument to standard output in sequence. If any +file given is compressed, its decompressed content is copied. If a file +given does not exist, and its name does not end with one of the known +extensions, zcat tries the compressed file names corresponding to the +formats supported until one is found. If a file fails to decompress, zcat +continues copying the rest of the files. +.PP +If a file is specified as '\-', data are read from standard input, +decompressed if needed, and sent to standard output. Data read from +standard input must be of the same type; all uncompressed or all in the +same compressed format. +.PP +If no files are specified, recursive searches examine the current +working directory, and nonrecursive searches read standard input. +.PP +The formats supported are bzip2, gzip, lzip, xz, and zstd. +.PP +Exit status is 0 if no errors occurred, 1 otherwise. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-A\fR, \fB\-\-show\-all\fR +equivalent to '\-vET' +.TP +\fB\-b\fR, \fB\-\-number\-nonblank\fR +number nonblank output lines +.TP +\fB\-e\fR +equivalent to '\-vE' +.TP +\fB\-E\fR, \fB\-\-show\-ends\fR +display '$' at end of each line +.TP +\fB\-M\fR, \fB\-\-format=\fR<list> +process only the formats in <list> +.TP +\fB\-n\fR, \fB\-\-number\fR +number all output lines +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-O\fR, \fB\-\-force\-format=\fR<fmt> +force the input format +.TP +\fB\-q\fR, \fB\-\-quiet\fR +suppress all messages +.TP +\fB\-r\fR, \fB\-\-recursive\fR +operate recursively on directories +.TP +\fB\-R\fR, \fB\-\-dereference\-recursive\fR +recursively follow symbolic links +.TP +\fB\-s\fR, \fB\-\-squeeze\-blank\fR +never more than one single blank line +.TP +\fB\-t\fR +equivalent to '\-vT' +.TP +\fB\-T\fR, \fB\-\-show\-tabs\fR +display TAB characters as '^I' +.TP +\fB\-v\fR, \fB\-\-show\-nonprinting\fR +use '^' and 'M\-' notation, except for LF and TAB +.TP +\fB\-\-verbose\fR +verbose mode (show error messages) +.TP +\fB\-\-bz2=\fR<command> +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR<command> +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR<command> +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR<command> +set compressor and options for xz format +.TP +\fB\-\-zst=\fR<command> +set compressor and options for zstd format +.PP +Valid formats for options '\-M' and '\-O' are 'bz2', 'gz', 'lz', 'xz', 'zst', +and 'un' for uncompressed. +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2024 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +The full documentation for +.B zcat +is maintained as a Texinfo manual. If the +.B info +and +.B zcat +programs are properly installed at your site, the command +.IP +.B info zutils +.PP +should give you access to the complete manual. diff --git a/doc/zcmp.1 b/doc/zcmp.1 new file mode 100644 index 0000000..09af2a5 --- /dev/null +++ b/doc/zcmp.1 @@ -0,0 +1,108 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2. +.TH ZCMP "1" "January 2024" "zutils 1.13" "User Commands" +.SH NAME +zcmp \- decompress and compare two files byte by byte +.SH SYNOPSIS +.B zcmp +[\fI\,options\/\fR] \fI\,file1 \/\fR[\fI\,file2\/\fR] +.SH DESCRIPTION +zcmp compares two files and, if they differ, writes to standard output the +first byte and line number where they differ. Bytes and lines are numbered +starting with 1. A hyphen '\-' used as a file argument means standard input. +If any file given is compressed, its decompressed content is used. Compressed +files are decompressed on the fly; no temporary files are created. +.PP +The formats supported are bzip2, gzip, lzip, xz, and zstd. +.PP +zcmp compares file1 to file2. The standard input is used only if file1 or +file2 refers to standard input. If file2 is omitted zcmp tries to compare +file1 with the corresponding uncompressed file (if file1 is compressed), and +then with the corresponding compressed files of the remaining formats until +one is found. +.PP +Exit status is 0 if inputs are identical, 1 if different, 2 if trouble. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-b\fR, \fB\-\-print\-bytes\fR +print differing bytes +.TP +\fB\-H\fR, \fB\-\-hexadecimal\fR +print hexadecimal values instead of octal +.TP +\fB\-i\fR, \fB\-\-ignore\-initial=\fR<n>[:<n2>] +ignore differences in the first <n> bytes +.TP +\fB\-l\fR, \fB\-\-list\fR +list position, value of all differing bytes +.TP +\fB\-M\fR, \fB\-\-format=\fR<list> +process only the formats in <list> +.TP +\fB\-n\fR, \fB\-\-bytes=\fR<n> +compare at most <n> bytes +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-O\fR, \fB\-\-force\-format\fR=\fI\,[\/\fR<f1>][,<f2>] +force one or both input formats +.TP +\fB\-q\fR, \fB\-\-quiet\fR, \fB\-\-silent\fR +suppress diagnostics written to stderr +.TP +\fB\-s\fR, \fB\-\-script\fR +suppress messages about file differences +.TP +\fB\-v\fR, \fB\-\-verbose\fR +verbose mode (opposite of \fB\-\-quiet\fR) +.TP +\fB\-\-bz2=\fR<command> +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR<command> +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR<command> +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR<command> +set compressor and options for xz format +.TP +\fB\-\-zst=\fR<command> +set compressor and options for zstd format +.PP +Valid formats for options '\-M' and '\-O' are 'bz2', 'gz', 'lz', 'xz', 'zst', +and 'un' for uncompressed. +.PP +Byte counts given as arguments to options may be expressed in decimal, +hexadecimal, or octal (using the same syntax as integer constants in C++), +and may be followed by a multiplier: k = kB = 10^3 = 1000, +Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc. +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2024 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +The full documentation for +.B zcmp +is maintained as a Texinfo manual. If the +.B info +and +.B zcmp +programs are properly installed at your site, the command +.IP +.B info zutils +.PP +should give you access to the complete manual. diff --git a/doc/zdiff.1 b/doc/zdiff.1 new file mode 100644 index 0000000..bbcdc94 --- /dev/null +++ b/doc/zdiff.1 @@ -0,0 +1,139 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2. +.TH ZDIFF "1" "January 2024" "zutils 1.13" "User Commands" +.SH NAME +zdiff \- decompress and compare two files line by line +.SH SYNOPSIS +.B zdiff +[\fI\,options\/\fR] \fI\,file1 \/\fR[\fI\,file2\/\fR] +.SH DESCRIPTION +zdiff compares two files and, if they differ, writes to standard output the +differences line by line. A hyphen '\-' used as a file argument means standard +input. If any file given is compressed, its decompressed content is used. +zdiff is a front end to the program diff and has the limitation that messages +from diff refer to temporary file names instead of those specified. +.PP +\&'zdiff \fB\-v\fR \fB\-V\fR' prints the version of the diff program used. +.PP +The formats supported are bzip2, gzip, lzip, xz, and zstd. +.PP +zdiff compares file1 to file2. The standard input is used only if file1 or +file2 refers to standard input. If file2 is omitted zdiff tries to compare +file1 with the corresponding uncompressed file (if file1 is compressed), and +then with the corresponding compressed files of the remaining formats until +one is found. +.PP +Exit status is 0 if inputs are identical, 1 if different, 2 if trouble. +Some options only work if the diff program used supports them. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-a\fR, \fB\-\-text\fR +treat all files as text +.TP +\fB\-b\fR, \fB\-\-ignore\-space\-change\fR +ignore changes in the amount of white space +.TP +\fB\-B\fR, \fB\-\-ignore\-blank\-lines\fR +ignore changes whose lines are all blank +.TP +\fB\-c\fR +use the context output format +.TP +\fB\-C\fR, \fB\-\-context=\fR<n> +same as \fB\-c\fR but use <n> lines of context +.TP +\fB\-d\fR, \fB\-\-minimal\fR +try hard to find a smaller set of changes +.TP +\fB\-E\fR, \fB\-\-ignore\-tab\-expansion\fR +ignore changes due to tab expansion +.TP +\fB\-i\fR, \fB\-\-ignore\-case\fR +ignore case differences +.TP +\fB\-M\fR, \fB\-\-format=\fR<list> +process only the formats in <list> +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-O\fR, \fB\-\-force\-format\fR=\fI\,[\/\fR<f1>][,<f2>] +force one or both input formats +.TP +\fB\-p\fR, \fB\-\-show\-c\-function\fR +show which C function each change is in +.TP +\fB\-q\fR, \fB\-\-brief\fR +output only whether files differ +.TP +\fB\-s\fR, \fB\-\-report\-identical\-files\fR +report when two files are identical +.TP +\fB\-t\fR, \fB\-\-expand\-tabs\fR +expand tabs to spaces in output +.TP +\fB\-T\fR, \fB\-\-initial\-tab\fR +make tabs line up by prepending a tab +.TP +\fB\-u\fR +use the unified output format +.TP +\fB\-U\fR, \fB\-\-unified=\fR<n> +same as \fB\-u\fR but use <n> lines of context +.TP +\fB\-v\fR, \fB\-\-verbose\fR +verbose mode (for \fB\-\-version\fR) +.TP +\fB\-w\fR, \fB\-\-ignore\-all\-space\fR +ignore all white space +.TP +\fB\-W\fR, \fB\-\-width=\fR<n> +output at most <n> print columns (for \fB\-y\fR) +.TP +\fB\-y\fR, \fB\-\-side\-by\-side\fR +output in two columns +.TP +\fB\-\-bz2=\fR<command> +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR<command> +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR<command> +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR<command> +set compressor and options for xz format +.TP +\fB\-\-zst=\fR<command> +set compressor and options for zstd format +.PP +Valid formats for options '\-M' and '\-O' are 'bz2', 'gz', 'lz', 'xz', 'zst', +and 'un' for uncompressed. +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2024 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +The full documentation for +.B zdiff +is maintained as a Texinfo manual. If the +.B info +and +.B zdiff +programs are properly installed at your site, the command +.IP +.B info zutils +.PP +should give you access to the complete manual. diff --git a/doc/zgrep.1 b/doc/zgrep.1 new file mode 100644 index 0000000..605f1cb --- /dev/null +++ b/doc/zgrep.1 @@ -0,0 +1,194 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2. +.TH ZGREP "1" "January 2024" "zutils 1.13" "User Commands" +.SH NAME +zgrep \- search compressed files for a regular expression +.SH SYNOPSIS +.B zgrep +[\fI\,options\/\fR] \fI\,<pattern> \/\fR[\fI\,files\/\fR] +.SH DESCRIPTION +zgrep is a front end to the program grep that allows transparent search +on any combination of compressed and uncompressed files. If any file +given is compressed, its decompressed content is used. If a file given +does not exist, and its name does not end with one of the known +extensions, zgrep tries the compressed file names corresponding to the +formats supported until one is found. If a file fails to decompress, zgrep +continues searching the rest of the files. +.PP +If a file is specified as '\-', data are read from standard input, +decompressed if needed, and fed to grep. Data read from standard input +must be of the same type; all uncompressed or all in the same +compressed format. +.PP +If no files are specified, recursive searches examine the current +working directory, and nonrecursive searches read standard input. +.PP +\&'zgrep \fB\-\-verbose\fR \fB\-V\fR' prints the version of the grep program used. +.PP +The formats supported are bzip2, gzip, lzip, xz, and zstd. +.PP +Exit status is 0 if match, 1 if no match, 2 if trouble. +Some options only work if the grep program used supports them. +.SH OPTIONS +.TP +\fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-a\fR, \fB\-\-text\fR +treat all files as text +.TP +\fB\-A\fR, \fB\-\-after\-context=\fR<n> +print <n> lines of trailing context +.TP +\fB\-b\fR, \fB\-\-byte\-offset\fR +print the byte offset of each line +.TP +\fB\-B\fR, \fB\-\-before\-context=\fR<n> +print <n> lines of leading context +.TP +\fB\-c\fR, \fB\-\-count\fR +only print a count of matching lines per file +.TP +\fB\-C\fR, \fB\-\-context=\fR<n> +print <n> lines of output context +.TP +\fB\-\-color[=\fR<when>] +show matched strings in color +.TP +\fB\-e\fR, \fB\-\-regexp=\fR<pattern> +use <pattern> as the pattern to match +.TP +\fB\-E\fR, \fB\-\-extended\-regexp\fR +<pattern> is an extended regular expression +.TP +\fB\-f\fR, \fB\-\-file=\fR<file> +obtain patterns from <file> +.TP +\fB\-F\fR, \fB\-\-fixed\-strings\fR +<pattern> is a set of newline\-separated strings +.TP +\fB\-G\fR, \fB\-\-basic\-regexp\fR +<pattern> is a basic regular expression (default) +.TP +\fB\-h\fR, \fB\-\-no\-filename\fR +suppress the prefixing file name on output +.TP +\fB\-H\fR, \fB\-\-with\-filename\fR +print the file name for each match +.TP +\fB\-i\fR, \fB\-\-ignore\-case\fR +ignore case distinctions +.TP +\fB\-I\fR +ignore binary files +.TP +\fB\-l\fR, \fB\-\-files\-with\-matches\fR +only print names of files containing matches +.TP +\fB\-L\fR, \fB\-\-files\-without\-match\fR +only print names of files containing no matches +.TP +\fB\-\-label=\fR<label> +use <label> as file name for standard input +.TP +\fB\-\-line\-buffered\fR +flush output on every line +.TP +\fB\-m\fR, \fB\-\-max\-count=\fR<n> +stop after <n> matches +.TP +\fB\-M\fR, \fB\-\-format=\fR<list> +process only the formats in <list> +.TP +\fB\-n\fR, \fB\-\-line\-number\fR +print the line number of each line +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-o\fR, \fB\-\-only\-matching\fR +show only the part of a line matching <pattern> +.TP +\fB\-O\fR, \fB\-\-force\-format=\fR<fmt> +force the input format +.TP +\fB\-P\fR, \fB\-\-perl\-regexp\fR +<pattern> is a Perl regular expression +.TP +\fB\-q\fR, \fB\-\-quiet\fR, \fB\-\-silent\fR +suppress all messages +.TP +\fB\-r\fR, \fB\-\-recursive\fR +operate recursively on directories +.TP +\fB\-R\fR, \fB\-\-dereference\-recursive\fR +recursively follow symbolic links +.TP +\fB\-s\fR, \fB\-\-no\-messages\fR +suppress error messages +.TP +\fB\-T\fR, \fB\-\-initial\-tab\fR +make tabs line up (if needed) +.TP +\fB\-U\fR, \fB\-\-binary\fR +don't strip CR characters at EOL (DOS/Windows) +.TP +\fB\-v\fR, \fB\-\-invert\-match\fR +select non\-matching lines +.TP +\fB\-\-verbose\fR +verbose mode (show error messages) +.TP +\fB\-w\fR, \fB\-\-word\-regexp\fR +match only whole words +.TP +\fB\-x\fR, \fB\-\-line\-regexp\fR +match only whole lines +.TP +\fB\-Z\fR, \fB\-\-null\fR +print 0 byte (ASCII NUL) after file name +.TP +\fB\-\-bz2=\fR<command> +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR<command> +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR<command> +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR<command> +set compressor and options for xz format +.TP +\fB\-\-zst=\fR<command> +set compressor and options for zstd format +.PP +Valid formats for options '\-M' and '\-O' are 'bz2', 'gz', 'lz', 'xz', 'zst', +and 'un' for uncompressed. +.PP +Numbers may be followed by a multiplier: k = kB = 10^3 = 1000, +Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc... +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2024 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +The full documentation for +.B zgrep +is maintained as a Texinfo manual. If the +.B info +and +.B zgrep +programs are properly installed at your site, the command +.IP +.B info zutils +.PP +should give you access to the complete manual. diff --git a/doc/ztest.1 b/doc/ztest.1 new file mode 100644 index 0000000..caa0b2a --- /dev/null +++ b/doc/ztest.1 @@ -0,0 +1,102 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2. +.TH ZTEST "1" "January 2024" "zutils 1.13" "User Commands" +.SH NAME +ztest \- check the integrity of compressed files +.SH SYNOPSIS +.B ztest +[\fI\,options\/\fR] [\fI\,files\/\fR] +.SH DESCRIPTION +ztest checks the integrity of the compressed files specified. It +also warns if an uncompressed file has a compressed file name extension, or +if a compressed file has a wrong compressed extension. Uncompressed files +are otherwise ignored. If a file is specified as '\-', the integrity of +compressed data read from standard input is checked. Data read from +standard input must be all in the same compressed format. If a file fails to +decompress, does not exist, can't be opened, or is a terminal, ztest +continues testing the rest of the files. A final diagnostic is shown at +verbosity level 1 or higher if any file fails the test when testing multiple +files. +.PP +If no files are specified, recursive searches examine the current +working directory, and nonrecursive searches read standard input. +.PP +The formats supported are bzip2, gzip, lzip, xz, and zstd. +.PP +Note that error detection in the xz format is broken. First, some xz files +lack integrity information. Second, not all xz decompressors can check the +integrity of all xz files. Third, section 2.1.1.2 'Stream Flags' of the +xz format specification allows xz decompressors to produce garbage output +without issuing any warning. Therefore, xz files can't always be checked as +reliably as files in the other formats can. +.PP +Exit status is 0 if all compressed files check OK, 1 if environmental +problems (file not found, invalid command\-line options, I/O errors, etc), +2 if any compressed file is corrupt or invalid, or if any file has an +incorrect file name extension. +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-M\fR, \fB\-\-format=\fR<list> +process only the formats in <list> +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-O\fR, \fB\-\-force\-format=\fR<fmt> +force the input format +.TP +\fB\-q\fR, \fB\-\-quiet\fR +suppress all messages +.TP +\fB\-r\fR, \fB\-\-recursive\fR +operate recursively on directories +.TP +\fB\-R\fR, \fB\-\-dereference\-recursive\fR +recursively follow symbolic links +.TP +\fB\-v\fR, \fB\-\-verbose\fR +be verbose (a 2nd \fB\-v\fR gives more) +.TP +\fB\-\-bz2=\fR<command> +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR<command> +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR<command> +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR<command> +set compressor and options for xz format +.TP +\fB\-\-zst=\fR<command> +set compressor and options for zstd format +.PP +Valid formats for options '\-M' and '\-O' are 'bz2', 'gz', 'lz', 'xz', and 'zst'. +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2024 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +The full documentation for +.B ztest +is maintained as a Texinfo manual. If the +.B info +and +.B ztest +programs are properly installed at your site, the command +.IP +.B info zutils +.PP +should give you access to the complete manual. diff --git a/doc/zupdate.1 b/doc/zupdate.1 new file mode 100644 index 0000000..c50a2df --- /dev/null +++ b/doc/zupdate.1 @@ -0,0 +1,122 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2. +.TH ZUPDATE "1" "January 2024" "zutils 1.13" "User Commands" +.SH NAME +zupdate \- recompress bzip2, gzip, xz, zstd files to lzip format +.SH SYNOPSIS +.B zupdate +[\fI\,options\/\fR] [\fI\,files\/\fR] +.SH DESCRIPTION +zupdate recompresses files from bzip2, gzip, xz, and zstd formats to lzip +format. Each original is compared with the new file and then deleted. +Only regular files with standard file name extensions are recompressed, +other files are ignored. Compressed files are decompressed and then +recompressed on the fly; no temporary files are created. The lzip format +is chosen as destination because it is the most appropriate for +long\-term archiving. +.PP +If no files are specified, recursive searches examine the current +working directory, and nonrecursive searches do nothing. +.PP +If the lzip\-compressed version of a file already exists, the file is skipped +unless the option '\-\-force' is given. In this case, if the comparison with +the existing lzip version fails, an error is returned and the original file +is not deleted. The operation of zupdate is meant to be safe and not cause +any data loss. Therefore, existing lzip\-compressed files are never +overwritten nor deleted. +.PP +The names of the original files must have one of the following extensions: +.PP +\&'.bz2', '.gz', '.xz', '.zst', or '.Z', which are recompressed to '.lz'. +.PP +\&'.tbz', '.tbz2', '.tgz', '.txz', or '.tzst', which are recompressed to '.tlz'. +.PP +Exit status is 0 if all the compressed files were successfully recompressed +(if needed), compared, and deleted (if requested). 1 if a non\-fatal error +occurred (file not found or not regular, or has invalid format, or can't be +deleted). 2 if a fatal error occurred (invalid command\-line options, +compressor can't be run, or comparison fails). +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-d\fR, \fB\-\-destdir=\fR<dir> +write recompressed files into <dir> +.TP +\fB\-e\fR, \fB\-\-expand\-extensions\fR +expand combined extensions; tgz \fB\-\-\fR> tar.lz +.TP +\fB\-f\fR, \fB\-\-force\fR +don't skip a file even if the .lz exists +.TP +\fB\-i\fR, \fB\-\-ignore\-errors\fR +ignore non\-fatal errors +.TP +\fB\-k\fR, \fB\-\-keep\fR +keep (don't delete) input files +.TP +\fB\-l\fR, \fB\-\-lzip\-verbose\fR +pass one option \fB\-v\fR to the lzip compressor +.TP +\fB\-M\fR, \fB\-\-format=\fR<list> +process only the formats in <list> +.TP +\fB\-N\fR, \fB\-\-no\-rcfile\fR +don't read runtime configuration file +.TP +\fB\-q\fR, \fB\-\-quiet\fR +suppress all messages +.TP +\fB\-r\fR, \fB\-\-recursive\fR +operate recursively on directories +.TP +\fB\-R\fR, \fB\-\-dereference\-recursive\fR +recursively follow symbolic links +.TP +\fB\-v\fR, \fB\-\-verbose\fR +be verbose (a 2nd \fB\-v\fR gives more) +.TP +\fB\-0\fR .. \fB\-9\fR +set compression level [default 9] +.TP +\fB\-\-bz2=\fR<command> +set compressor and options for bzip2 format +.TP +\fB\-\-gz=\fR<command> +set compressor and options for gzip format +.TP +\fB\-\-lz=\fR<command> +set compressor and options for lzip format +.TP +\fB\-\-xz=\fR<command> +set compressor and options for xz format +.TP +\fB\-\-zst=\fR<command> +set compressor and options for zstd format +.PP +Valid formats for option '\-M' are 'bz2', 'gz', 'lz', 'xz', and 'zst'. +.SH "REPORTING BUGS" +Report bugs to zutils\-bug@nongnu.org +.br +Zutils home page: http://www.nongnu.org/zutils/zutils.html +.SH COPYRIGHT +Copyright \(co 2024 Antonio Diaz Diaz. +License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +The full documentation for +.B zupdate +is maintained as a Texinfo manual. If the +.B info +and +.B zupdate +programs are properly installed at your site, the command +.IP +.B info zutils +.PP +should give you access to the complete manual. diff --git a/doc/zutils.info b/doc/zutils.info new file mode 100644 index 0000000..d7f9cb4 --- /dev/null +++ b/doc/zutils.info @@ -0,0 +1,988 @@ +This is zutils.info, produced by makeinfo version 4.13+ from zutils.texi. + +INFO-DIR-SECTION Compression +START-INFO-DIR-ENTRY +* Zutils: (zutils). Utilities dealing with compressed files +END-INFO-DIR-ENTRY + + +File: zutils.info, Node: Top, Next: Introduction, Up: (dir) + +Zutils Manual +************* + +This manual is for Zutils (version 1.13, 23 January 2024). + +* Menu: + +* Introduction:: Purpose and features of zutils +* Common options:: Options common to all utilities +* Configuration:: The configuration file zutils.conf +* Zcat:: Concatenating compressed files +* Zcmp:: Comparing compressed files byte by byte +* Zdiff:: Comparing compressed files line by line +* Zgrep:: Searching inside compressed files +* Ztest:: Testing the integrity of compressed files +* Zupdate:: Recompressing files to lzip format +* Problems:: Reporting bugs +* Concept index:: Index of concepts + + + Copyright (C) 2009-2024 Antonio Diaz Diaz. + + This manual is free documentation: you have unlimited permission to copy, +distribute, and modify it. + + +File: zutils.info, Node: Introduction, Next: Common options, Prev: Top, Up: Top + +1 Introduction +************** + +Zutils is a collection of utilities able to process any combination of +compressed and uncompressed files transparently. If any file given, +including standard input, is compressed, its decompressed content is used. +Compressed files are decompressed on the fly; no temporary files are +created. Data format is detected by its identifier string (magic bytes), not +by the file name extension. Empty files are considered uncompressed. + + These utilities are not wrapper scripts but safer and more efficient C++ +programs. In particular the option '--recursive' is very efficient in those +utilities supporting it. + +The utilities provided are 'zcat', 'zcmp', 'zdiff', 'zgrep', 'ztest', and +'zupdate'. +The formats supported are bzip2, gzip, lzip, xz, and zstd. +Zutils uses external compressors. The compressor to be used for each format +is configurable at runtime. + + 'zcat', 'zcmp', 'zdiff', and 'zgrep' are improved replacements for the +shell scripts provided by GNU gzip. 'ztest' is unique to zutils. 'zupdate' +is similar to gzip's znew. + + When 'zcat', 'zcmp', 'zdiff', or 'zgrep' need to try compressed file +names, the search order is: lzip, gzip, bzip2, zstd, xz. +(FILE.[lz|gz|bz2|zst|xz]). + + NOTE: Bzip2 and lzip provide well-defined values of exit status, which +makes them safe to use with zutils. Gzip and xz may return ambiguous warning +values, making them less reliable back ends for zutils. Zstd currently does +not even document its exit status in its man page. *Note +compressor-requirements::. + + FORMAT NOTE 1: The option '--format' allows the processing of a subset +of formats in recursive mode and when trying compressed file names. For +example, use the following command to search for the string 'foo' in gzip +and lzip files only: 'zgrep foo -r --format=gz,lz somedir somefile.tar'. + + FORMAT NOTE 2: The standard POSIX compress format (.Z) is obsolete and is +only supported through gzip. For this to work, the gzip program used (for +example GNU gzip) must be able to decompress .Z files. + + LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never +have been compressed. Decompressed is used to refer to data which have +undergone the process of decompression. + + +File: zutils.info, Node: Common options, Next: Configuration, Prev: Introduction, Up: Top + +2 Common options +**************** + +The following options: are available in all the utilities. Rather than +writing identical descriptions for each of the programs, they are described +here. Remember to prepend './' to any file name beginning with a hyphen, or +use '--'. *Note Argument syntax: (arg_parser)Argument syntax. + +'-h' +'--help' + Print an informative help message describing the options and exit. + 'zgrep' only supports the '--help' form of this option. + +'-V' +'--version' + Print the version number on the standard output and exit. This version + number should be included in all bug reports. In verbose mode, 'zdiff' + and 'zgrep' print also the version of the diff or grep program used + respectively. At verbosity level 1 (2 for 'zdiff' and 'zgrep') or + higher, print also the versions of the compressors used (perhaps + limited by option '--format'). (The compressors used must support the + option '-V' for this to work). + +'-M FORMAT_LIST' +'--format=FORMAT_LIST' + Process only the formats listed in the comma-separated FORMAT_LIST. + Valid formats are 'bz2', 'gz', 'lz', 'xz', 'zst', and 'un' for + 'uncompressed', meaning "any file name without a known extension". + This option excludes files based on extension, instead of format, + because it is more efficient. The exclusion only applies to names + generated automatically (for example when adding extensions to a file + name or when operating recursively on directories). Files given in the + command line are always processed. + + Each format in FORMAT_LIST enables file names with the following + extensions: + + bz2 enables .bz2 .tbz .tbz2 + gz enables .gz .tgz .Z + lz enables .lz .tlz + xz enables .xz .txz + zst enables .zst .tzst + un enables any other file name + +'-N' +'--no-rcfile' + Don't read the runtime configuration file 'zutils.conf'. + +'--bz2=COMMAND' +'--gz=COMMAND' +'--lz=COMMAND' +'--xz=COMMAND' +'--zst=COMMAND' + Set program to be used as decompressor for the corresponding format. + COMMAND may include arguments. For example '--lz='plzip --threads=2''. + 'zupdate' uses '--lz' for compression, not for decompression (*note + lz-compressor::). The name of the program can't begin with '-'. These + options override the values set in 'zutils.conf'. The compression + program used must meet three requirements: + + 1. When called with the option '-d' and without file names, it must + read compressed data from the standard input and produce + decompressed data on the standard output. + + 2. If the option '-q' is passed to zutils, the compression program + must also accept it. + + 3. It must return 0 if no errors occurred, and a non-zero value + otherwise. + + + Numbers given as arguments to options may be expressed in decimal, +hexadecimal, or octal (using the same syntax as integer constants in C++), +and may be followed by a multiplier and an optional 'B' for "byte". + + Table of SI and binary prefixes (unit multipliers): + +Prefix Value | Prefix Value +k kilobyte (10^3 = 1000) | Ki kibibyte (2^10 = 1024) +M megabyte (10^6) | Mi mebibyte (2^20) +G gigabyte (10^9) | Gi gibibyte (2^30) +T terabyte (10^12) | Ti tebibyte (2^40) +P petabyte (10^15) | Pi pebibyte (2^50) +E exabyte (10^18) | Ei exbibyte (2^60) +Z zettabyte (10^21) | Zi zebibyte (2^70) +Y yottabyte (10^24) | Yi yobibyte (2^80) +R ronnabyte (10^27) | Ri robibyte (2^90) +Q quettabyte (10^30) | Qi quebibyte (2^100) + + +File: zutils.info, Node: Configuration, Next: Zcat, Prev: Common options, Up: Top + +3 The configuration file 'zutils.conf' +************************************** + +'zutils.conf' is the runtime configuration file for zutils. In it you may +define the compressor name and options to be used for each format. +'zutils.conf' is optional; you don't need to install it in order to run +zutils. + + The compressors specified in the command line override those specified +in 'zutils.conf'. + + You may copy the system 'zutils.conf' file '${sysconfdir}/zutils.conf' +to '$XDG_CONFIG_HOME/zutils.conf' and customize these options as you like. +('XDG_CONFIG_HOME' defaults to '$HOME/.config'). The file syntax is fairly +obvious (and there are further instructions in it): + + 1. Any line beginning with '#' is a comment line. + + 2. Each non-comment line defines the command to be used for the + corresponding format, with the syntax: + <format> = <compressor> [options] + where <format> is one of 'bz2', 'gz', 'lz', 'xz', or 'zst'. + + +File: zutils.info, Node: Zcat, Next: Zcmp, Prev: Configuration, Up: Top + +4 Zcat +****** + +'zcat' copies each FILE argument to standard output in sequence. If any +file given is compressed, its decompressed content is copied. If a file +given does not exist, and its name does not end with one of the known +extensions, 'zcat' tries the compressed file names corresponding to the +formats supported until one is found. *Note search-order::. If a file fails +to decompress, 'zcat' continues copying the rest of the files. + + If a file is specified as '-', data are read from standard input, +decompressed if needed, and sent to standard output. Data read from +standard input must be of the same type; all uncompressed or all in the +same compressed format. + + If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + + The format for running 'zcat' is: + + zcat [OPTIONS] [FILES] + +Exit status is 0 if no errors occurred, 1 otherwise. + + 'zcat' supports the following options: + +'-A' +'--show-all' + Equivalent to '-vET'. + +'-b' +'--number-nonblank' + Number all nonblank output lines, starting with 1. The line count is + unlimited. + +'-e' + Equivalent to '-vE'. + +'-E' +'--show-ends' + Print a '$' after the end of each line. + +'-n' +'--number' + Number all output lines, starting with 1. The line count is unlimited. + +'-O FORMAT' +'--force-format=FORMAT' + Force the compressed format given. Valid values for FORMAT are 'bz2', + 'gz', 'lz', 'xz', 'zst', and 'un' for 'uncompressed'. If this option + is used, the files are passed to the corresponding decompressor (or + transmitted unmodified) without checking their format, and the exact + file name must be given. Other names are not tried. + +'-q' +'--quiet' + Quiet operation. Suppress all messages. + +'-r' +'--recursive' + For each directory operand, read and process all files in that + directory, recursively. Follow symbolic links given in the command + line, but skip symbolic links that are encountered recursively. + +'-R' +'--dereference-recursive' + For each directory operand, read and process all files in that + directory, recursively, following all symbolic links. + +'-s' +'--squeeze-blank' + Replace multiple adjacent blank lines with a single blank line. + +'-t' + Equivalent to '-vT'. + +'-T' +'--show-tabs' + Print TAB characters as '^I'. + +'-v' +'--show-nonprinting' + Print control characters except for LF (newline) and TAB using '^' + notation and precede characters larger than 127 with 'M-' (which + stands for "meta"). + +'--verbose' + Verbose mode. Show error messages. Repeating it increases the verbosity + level. *Note version::. + + + +File: zutils.info, Node: Zcmp, Next: Zdiff, Prev: Zcat, Up: Top + +5 Zcmp +****** + +'zcmp' compares two files and, if they differ, writes to standard output +the first byte and line number where they differ. Bytes and lines are +numbered starting with 1. A hyphen '-' used as a FILE argument means +standard input. If any file given is compressed, its decompressed content +is used. Compressed files are decompressed on the fly; no temporary files +are created. + + The format for running 'zcmp' is: + + zcmp [OPTIONS] FILE1 [FILE2] + +This compares FILE1 to FILE2. The standard input is used only if FILE1 or +FILE2 refers to standard input. If FILE2 is omitted 'zcmp' tries to compare +FILE1 with the corresponding uncompressed file (if FILE1 is compressed), +and then with the corresponding compressed files of the remaining formats +until one is found. *Note search-order::. + +An exit status of 0 means no differences were found, 1 means some +differences were found, and 2 means trouble. + + 'zcmp' supports the following options: + +'-b' +'--print-bytes' + Print the values of the differing bytes (in octal by default) followed + by the bytes themselves in printable form. Print control bytes as a '^' + followed by a letter, and precede bytes larger than 127 with 'M-' + (which stands for "meta"). + +'-H' +'--hexadecimal' + Print the values of the differing bytes in hexadecimal instead of + octal. + +'-i SIZE' +'--ignore-initial=SIZE' + Ignore any differences in the first SIZE bytes of the input files. + Treat files with fewer than SIZE bytes as if they were empty. If SIZE + is in the form 'SIZE1:SIZE2', ignore the first SIZE1 bytes of the + first input file and the first SIZE2 bytes of the second input file. + +'-l' +'--list' + Print the byte numbers (in decimal) and values (in octal by default) + of all differing bytes. Bytes are numbered starting with 1. + +'-n COUNT' +'--bytes=COUNT' + Compare at most COUNT input bytes. + +'-O [FORMAT1][,FORMAT2]' +'--force-format=[FORMAT1][,FORMAT2]' + Force the compressed formats given. If FORMAT1 or FORMAT2 is omitted, + the corresponding format is automatically detected. Valid values for + FORMAT are 'bz2', 'gz', 'lz', 'xz', 'zst', and 'un' for + 'uncompressed'. If at least one format is specified with this option, + the file is passed to the corresponding decompressor (or transmitted + unmodified) without checking its format, and the exact file names of + both FILE1 and FILE2 must be given. Other names are not tried. + +'-q' +'--quiet' +'--silent' + Suppress diagnostics written to standard error, even the + 'EOF on <name_of_shorter_file>' diagnostic. Byte differences are still + written to standard output. ('-q' produces no output except byte + differences). + +'-s' +'--script' + Write nothing to standard output or standard error when files differ, + not even the 'EOF on <name_of_shorter_file>' diagnostic; indicate + differing files through exit status only. Diagnostic messages are still + written to standard error when an error is encountered. ('-s' produces + no output except error messages). + +'-v' +'--verbose' + Verbose mode. Undoes the effect of '--quiet'. Further -v's increase + the verbosity level. *Note version::. + + + +File: zutils.info, Node: Zdiff, Next: Zgrep, Prev: Zcmp, Up: Top + +6 Zdiff +******* + +'zdiff' compares two files and, if they differ, writes to standard output +the differences line by line. A hyphen '-' used as a FILE argument means +standard input. If any file given is compressed, its decompressed content +is used. 'zdiff' is a front end to the program diff and has the limitation +that messages from diff refer to temporary file names instead of those +specified. + + The format for running 'zdiff' is: + + zdiff [OPTIONS] FILE1 [FILE2] + +This compares FILE1 to FILE2. The standard input is used only if FILE1 or +FILE2 refers to standard input. If FILE2 is omitted 'zdiff' tries to +compare FILE1 with the corresponding uncompressed file (if FILE1 is +compressed), and then with the corresponding compressed files of the +remaining formats until one is found. *Note search-order::. + +An exit status of 0 means no differences were found, 1 means some +differences were found, and 2 means trouble. + + 'zdiff' supports the following options (some options only work if the +diff program used supports them): + +'-a' +'--text' + Treat all files as text. + +'-b' +'--ignore-space-change' + Ignore changes in the amount of white space. + +'-B' +'--ignore-blank-lines' + Ignore changes whose lines are all blank. + +'-c' + Use the context output format. + +'-C N' +'--context=N' + Same as -c but use N lines of context. + +'-d' +'--minimal' + Try hard to find a smaller set of changes. + +'-E' +'--ignore-tab-expansion' + Ignore changes due to tab expansion. + +'-i' +'--ignore-case' + Ignore case differences. Consider uppercase and lowercase letters + equivalent. + +'-O [FORMAT1][,FORMAT2]' +'--force-format=[FORMAT1][,FORMAT2]' + Force the compressed formats given. If FORMAT1 or FORMAT2 is omitted, + the corresponding format is automatically detected. Valid values for + FORMAT are 'bz2', 'gz', 'lz', 'xz', 'zst', and 'un' for + 'uncompressed'. If at least one format is specified with this option, + the file is passed to the corresponding decompressor (or transmitted + unmodified) without checking its format, and the exact file names of + both FILE1 and FILE2 must be given. Other names are not tried. + +'-p' +'--show-c-function' + Show which C function each change is in. + +'-q' +'--brief' + Output only whether files differ. + +'-s' +'--report-identical-files' + Report when two files are identical. + +'-t' +'--expand-tabs' + Expand tabs to spaces in output. + +'-T' +'--initial-tab' + Make tabs line up by prepending a tab. + +'-u' + Use the unified output format. + +'-U N' +'--unified=N' + Same as -u but use N lines of context. + +'-v' +'--verbose' + When specified before '--version', print the version of the diff + program used. Further -v's increase the verbosity level. *Note + version::. + +'-w' +'--ignore-all-space' + Ignore all white space. + +'-W COLUMNS' +'--width=COLUMNS' + Output at most the specified number of print columns per line in side + by side format. + +'-y' +'--side-by-side' + Use the side by side output format. + + + +File: zutils.info, Node: Zgrep, Next: Ztest, Prev: Zdiff, Up: Top + +7 Zgrep +******* + +'zgrep' is a front end to the program grep that allows transparent search +on any combination of compressed and uncompressed files. If any file given +is compressed, its decompressed content is used. If a file given does not +exist, and its name does not end with one of the known extensions, 'zgrep' +tries the compressed file names corresponding to the formats supported +until one is found. *Note search-order::. If a file fails to decompress, +'zgrep' continues searching the rest of the files. + + If a file is specified as '-', data are read from standard input, +decompressed if needed, and fed to grep. Data read from standard input must +be of the same type; all uncompressed or all in the same compressed format. + + If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + + For efficiency reasons, 'zgrep' does not always read all its input. For +example, the shell command 'base64 -d foo | zgrep -q X' can cause 'zgrep' +to exit immediately after reading a line containing 'X', without bothering +to read the rest of its input data. This in turn can cause base64 to exit +with a nonzero status because base64 cannot write to its output pipe after +'zgrep' exits. + + The format for running 'zgrep' is: + + zgrep [OPTIONS] PATTERN [FILES] + +An exit status of 0 means at least one match was found, 1 means no matches +were found, and 2 means trouble. + + 'zgrep' supports the following options (Some options only work if the +grep program used supports them. Options -h, -H, -r, -R, and -Z are managed +by 'zgrep' and not passed to grep): + +'-a' +'--text' + Treat all files as text. + +'-A N' +'--after-context=N' + Print N lines of trailing context. + +'-b' +'--byte-offset' + Print the byte offset of each line. + +'-B N' +'--before-context=N' + Print N lines of leading context. + +'-c' +'--count' + Only print a count of matching lines per file. + +'-C N' +'--context=N' + Print N lines of output context. + +'--color[=WHEN]' + Show matched strings in color. WHEN is 'never', 'always', or 'auto'. + +'-e PATTERN' +'--regexp=PATTERN' + Use PATTERN as the pattern to match. + +'-E' +'--extended-regexp' + Interpret PATTERN as an extended regular expression (ERE). + +'-f FILE' +'--file=FILE' + Obtain patterns from FILE, one per line. + When searching in several files at once, command substitution can be + used with '-e' to read FILE only once, for example if FILE is not a + regular file: 'zgrep -e "$(cat FILE)" file1.lz file2.gz' + +'-F' +'--fixed-strings' + Interpret PATTERN as a set of newline-separated strings. + +'-G' +'--basic-regexp' + Interpret PATTERN as a basic regular expression (BRE). This is the + default. + +'-h' +'--no-filename' + Suppress the prefixing of file names on output when multiple files are + searched. + +'-H' +'--with-filename' + Print the file name for each match. + +'-i' +'--ignore-case' + Ignore case distinctions. + +'-I' + Ignore binary files. + +'-l' +'--files-with-matches' + Only print names of files containing at least one match. Stop reading + each file on the first match. + +'-L' +'--files-without-match' + Only print names of files not containing any matches. Stop reading + each file on the first match. + Note: option -L fails (prints wrong results, returns wrong status, and + even hangs) when using GNU grep versions 3.2 to 3.4 inclusive because + of a wrong change in the exit status of grep, which was reverted in + GNU grep 3.5. + +'--label=LABEL' + Display input actually coming from standard input as input coming from + file LABEL. + +'--line-buffered' + Use line buffering on output. This may cause a performance penalty. + +'-m N' +'--max-count=N' + Stop after N matches. + +'-n' +'--line-number' + Prefix each matched line with its line number in the input file. + +'-o' +'--only-matching' + Show only the part of matching lines that actually matches PATTERN. + +'-O FORMAT' +'--force-format=FORMAT' + Force the compressed format given. Valid values for FORMAT are 'bz2', + 'gz', 'lz', 'xz', 'zst', and 'un' for 'uncompressed'. If this option + is used, the files are passed to the corresponding decompressor (or + transmitted unmodified) without checking their format, and the exact + file name must be given. Other names are not tried. + +'-P' +'--perl-regexp' + Interpret PATTERN as a Perl-compatible regular expression (PCRE). + +'-q' +'--quiet' +'--silent' + Suppress all messages. Exit immediately with zero status if any match + is found, even if an error was detected. + +'-r' +'--recursive' + For each directory operand, read and process all files in that + directory, recursively. Follow symbolic links given in the command + line, but skip symbolic links that are encountered recursively. + +'-R' +'--dereference-recursive' + For each directory operand, read and process all files in that + directory, recursively, following all symbolic links. + +'-s' +'--no-messages' + Suppress error messages about nonexistent or unreadable files. + +'-T' +'--initial-tab' + Make sure that the first character of actual line content lies on a tab + stop, so that the alignment of tabs looks normal. + +'-U' +'--binary' + Use binary I/O on platforms affected by the bug known as "text mode + I/O". (MS-DOS, MS-Windows, OS/2). + +'-v' +'--invert-match' + Select non-matching lines. + +'--verbose' + Verbose mode. Show error messages. When specified before '--version', + print the version of the grep program used. Repeating it increases the + verbosity level. *Note version::. + +'-w' +'--word-regexp' + Match only whole words. + +'-x' +'--line-regexp' + Match only whole lines. + +'-Z' +'--null' + Output a zero byte (the ASCII NUL character) instead of the character + that normally follows a file name. For example, 'zgrep -lZ' outputs a + zero byte after each file name instead of the usual newline. This + option makes the output unambiguous, even in the presence of file + names containing unusual characters like newlines. + + + +File: zutils.info, Node: Ztest, Next: Zupdate, Prev: Zgrep, Up: Top + +8 Ztest +******* + +'ztest' checks the integrity of the compressed files specified. It also +warns if an uncompressed file has a compressed file name extension, or if a +compressed file has a wrong compressed extension. Uncompressed files are +otherwise ignored. If a file is specified as '-', the integrity of +compressed data read from standard input is checked. Data read from +standard input must be all in the same compressed format. If a file fails to +decompress, does not exist, can't be opened, or is a terminal, 'ztest' +continues testing the rest of the files. A final diagnostic is shown at +verbosity level 1 or higher if any file fails the test when testing multiple +files. + + If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + + Bzip2, gzip, and lzip are the primary formats. Xz and zstd are optional. +If the decompressor for the xz or zstd formats is not found, the +corresponding files are ignored. + + Note that error detection in the xz format is broken. First, some xz +files lack integrity information. Second, not all xz decompressors can +check the integrity of all xz files. Third, section 2.1.1.2 'Stream Flags' +of the xz format specification allows xz decompressors to produce garbage +output without issuing any warning. Therefore, xz files can't always be +checked as reliably as files in the other formats can. + + The format for running 'ztest' is: + + ztest [OPTIONS] [FILES] + +Exit status is 0 if all compressed files check OK, 1 if environmental +problems (file not found, invalid command-line options, I/O errors, etc), 2 +if any compressed file is corrupt or invalid, or if any file has an +incorrect file name extension. + + 'ztest' supports the following options: + +'-O FORMAT' +'--force-format=FORMAT' + Force the compressed format given. Valid values for FORMAT are 'bz2', + 'gz', 'lz', 'xz', and 'zst'. If this option is used, the files are + passed to the corresponding decompressor without checking their + format, and any files in a format that the decompressor can't + understand fail the test. + +'-q' +'--quiet' + Quiet operation. Suppress all messages. + +'-r' +'--recursive' + For each directory operand, read and process all files in that + directory, recursively. Follow symbolic links given in the command + line, but skip symbolic links that are encountered recursively. + +'-R' +'--dereference-recursive' + For each directory operand, read and process all files in that + directory, recursively, following all symbolic links. + +'-v' +'--verbose' + Verbose mode. Show the check status for each file processed. Further + -v's increase the verbosity level. *Note version::. + + + +File: zutils.info, Node: Zupdate, Next: Problems, Prev: Ztest, Up: Top + +9 Zupdate +********* + +'zupdate' recompresses files from bzip2, gzip, xz, and zstd formats to lzip +format. Each original is compared with the new file and then deleted. Only +regular files with standard file name extensions are recompressed, other +files are ignored. Compressed files are decompressed and then recompressed +on the fly; no temporary files are created. If an error happens while +recompressing a file, 'zupdate' exits immediately without recompressing the +rest of the files. The lzip format is chosen as destination because it is +the most appropriate for long-term archiving. + + If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches do nothing. + + If the lzip-compressed version of a file already exists, the file is +skipped unless the option '--force' is given. In this case, if the +comparison with the existing lzip version fails, an error is returned and +the original file is not deleted. The operation of 'zupdate' is meant to be +safe and not cause any data loss. Therefore, existing lzip-compressed files +are never overwritten nor deleted. + + Combining the options '--force' and '--keep', as in +'zupdate -f -k *.gz', checks that there are no differences between each +pair of files in a multiformat set of files. + + The names of the original files must have one of the following +extensions: +'.bz2', '.gz', '.xz', '.zst', or '.Z', which are recompressed to '.lz'; +'.tbz', '.tbz2', '.tgz', '.txz', or '.tzst', which are recompressed to +'.tlz'. +Keeping the combined extensions ('.tgz' --> '.tlz') may be useful when +recompressing Slackware packages, for example. + + Bzip2, gzip, and lzip are the primary formats. Xz and zstd are optional. +If the decompressor for the xz or zstd formats is not found, the +corresponding files are ignored. + + Recompressing a file is much like copying or moving it. Therefore +'zupdate' preserves the access and modification dates, permissions, and, if +you have appropriate privileges, ownership of the file just as 'cp -p' +does. (If the user ID or the group ID can't be duplicated, the file +permission bits S_ISUID and S_ISGID are cleared). + + The format for running 'zupdate' is: + + zupdate [OPTIONS] [FILES] + +Exit status is 0 if all the compressed files were successfully recompressed +(if needed), compared, and deleted (if requested). 1 if a non-fatal error +occurred (file not found or not regular, or has invalid format, or can't be +deleted). 2 if a fatal error occurred (invalid command-line options, +compressor can't be run, or comparison fails). + + 'zupdate' supports the following options: + +'-d DIR' +'--destdir=DIR' + Write recompressed files to another directory, using DIR as base + directory, instead of writing them in the same directory as the + original files. In recursive mode, this is done by replacing each + directory specified in the command line with DIR to produce the + recompressed file names. For example, 'zupdate -r -d DIR ../a' + recompresses a file named '../a/b/c.gz' to 'DIR/b/c.lz'. Regular files + specified in the command line are recompressed directly into DIR. For + example, 'zupdate -d DIR ../a/b/c.gz' writes the recompressed file to + 'DIR/c.lz'. + + This option allows recompressing files from a read-only file system to + another place without the need to copy or link them to the destination + directory first. (Remember to use option '--keep' when recompressing + read-only files to avoid warnings about files that can't be deleted). + +'-e' +'--expand-extensions' + Expand combined file name extensions; recompress '.tbz', '.tbz2', + '.tgz', '.txz', and '.tzst' to 'tar.lz'. + +'-f' +'--force' + Don't skip a file for which a lzip-compressed version already exists. + '--force' compares the content of the input file with the content of + the existing lzip file and deletes the input file if both contents are + identical. + +'-i' +'--ignore-errors' + Ignore non-fatal errors. (See exit status above). + +'-k' +'--keep' + Keep (don't delete) the input file after comparing it with the lzip + file. Use it when recompressing files from a read-only file system. + (See option '--destdir' above). + +'-l' +'--lzip-verbose' + Pass one option '-v' to the lzip compressor so that it shows the + compression ratio for each file processed. Using lzip 1.15 or newer, a + second '-l' shows the progress of compression. Use it together with + '-v' to see the name of the file. + +'-q' +'--quiet' + Quiet operation. Suppress all messages. + +'-r' +'--recursive' + For each directory operand, read and process all files in that + directory, recursively. Follow symbolic links given in the command + line, but skip symbolic links that are encountered recursively. + +'-R' +'--dereference-recursive' + For each directory operand, read and process all files in that + directory, recursively, following all symbolic links. + +'-v' +'--verbose' + Verbose mode. Show the files being processed. A second '-v' also shows + the files being ignored and increases the verbosity level. *Note + version::. + +'-0 .. -9' + Set the compression level of lzip. By default 'zupdate' passes '-9' to + lzip. Custom compression options can be passed to lzip with the option + '--lz'. For example '--lz='lzip -9 -s64MiB''. + +'--lz=COMMAND' + Set compression command. COMMAND may include arguments. For example + '--lz='plzip --threads=2''. The name of the program can't begin with + '-'. This option overrides the value set in 'zutils.conf'. The + compression program used does not need to implement decompression + (*note compressor-requirements::), but it must implement at least the + compression level option '-9' and the option '-o FILE' to write the + compressed output to FILE. tarlz meets these requirements, and + therefore can be used to recompress POSIX tar archives by using a + command like 'zupdate --lz='tarlz -9 -z --no-solid' archive.tar.gz'. + *Note tarlz manual: (tarlz)Top. + + + +File: zutils.info, Node: Problems, Next: Concept index, Prev: Zupdate, Up: Top + +10 Reporting bugs +***************** + +There are probably bugs in zutils. There are certainly errors and omissions +in this manual. If you report them, they will get fixed. If you don't, no +one will ever know about them and they will remain unfixed for all +eternity, if not longer. + + If you find a bug in zutils, please send electronic mail to +<zutils-bug@nongnu.org>. Include the version number, which you can find by +running 'zupdate --version'. + + +File: zutils.info, Node: Concept index, Prev: Problems, Up: Top + +Concept index +************* + + +* Menu: + +* bugs: Problems. (line 6) +* common options: Common options. (line 6) +* getting help: Problems. (line 6) +* introduction: Introduction. (line 6) +* zcat: Zcat. (line 6) +* zcmp: Zcmp. (line 6) +* zdiff: Zdiff. (line 6) +* zgrep: Zgrep. (line 6) +* ztest: Ztest. (line 6) +* zupdate: Zupdate. (line 6) +* zutils.conf: Configuration. (line 6) + + + +Tag Table: +Node: Top217 +Node: Introduction1152 +Ref: search-order2304 +Node: Common options3461 +Ref: version4027 +Ref: compressor-requirements5978 +Node: Configuration7367 +Node: Zcat8400 +Node: Zcmp11139 +Node: Zdiff14407 +Node: Zgrep17490 +Node: Ztest23637 +Node: Zupdate26430 +Ref: lz-compressor31838 +Node: Problems32539 +Node: Concept index33073 + +End Tag Table + + +Local Variables: +coding: iso-8859-15 +End: diff --git a/doc/zutils.texi b/doc/zutils.texi new file mode 100644 index 0000000..1646e7d --- /dev/null +++ b/doc/zutils.texi @@ -0,0 +1,1034 @@ +\input texinfo @c -*-texinfo-*- +@c %**start of header +@setfilename zutils.info +@documentencoding ISO-8859-15 +@settitle Zutils Manual +@finalout +@c %**end of header + +@set UPDATED 23 January 2024 +@set VERSION 1.13 + +@dircategory Compression +@direntry +* Zutils: (zutils). Utilities dealing with compressed files +@end direntry + + +@ifnothtml +@titlepage +@title Zutils +@subtitle Utilities dealing with compressed files +@subtitle for Zutils version @value{VERSION}, @value{UPDATED} +@author by Antonio Diaz Diaz + +@page +@vskip 0pt plus 1filll +@end titlepage + +@contents +@end ifnothtml + +@ifnottex +@node Top +@top + +This manual is for Zutils (version @value{VERSION}, @value{UPDATED}). + +@menu +* Introduction:: Purpose and features of zutils +* Common options:: Options common to all utilities +* Configuration:: The configuration file zutils.conf +* Zcat:: Concatenating compressed files +* Zcmp:: Comparing compressed files byte by byte +* Zdiff:: Comparing compressed files line by line +* Zgrep:: Searching inside compressed files +* Ztest:: Testing the integrity of compressed files +* Zupdate:: Recompressing files to lzip format +* Problems:: Reporting bugs +* Concept index:: Index of concepts +@end menu + +@sp 1 +Copyright @copyright{} 2009-2024 Antonio Diaz Diaz. + +This manual is free documentation: you have unlimited permission to copy, +distribute, and modify it. +@end ifnottex + + +@node Introduction +@chapter Introduction +@cindex introduction + +@uref{http://www.nongnu.org/zutils/zutils.html,,Zutils} +is a collection of utilities able to process any combination of +compressed and uncompressed files transparently. If any file given, +including standard input, is compressed, its decompressed content is used. +Compressed files are decompressed on the fly; no temporary files are +created. Data format is detected by its identifier string (magic bytes), not +by the file name extension. Empty files are considered uncompressed. + +These utilities are not wrapper scripts but safer and more efficient C++ +programs. In particular the option @option{--recursive} is very efficient in +those utilities supporting it. + +@noindent +The utilities provided are @command{zcat}, @command{zcmp}, @command{zdiff}, +@command{zgrep}, @command{ztest}, and @command{zupdate}.@* +The formats supported are bzip2, gzip, +@uref{http://www.nongnu.org/lzip/lzip.html,,lzip}, xz, and zstd.@* +Zutils uses external compressors. The compressor to be used for each format +is configurable at runtime. + +@command{zcat}, @command{zcmp}, @command{zdiff}, and @command{zgrep} are +improved replacements for the shell scripts provided by GNU gzip. +@command{ztest} is unique to zutils. @command{zupdate} is similar to gzip's +znew. + +@anchor{search-order} +When @command{zcat}, @command{zcmp}, @command{zdiff}, or @command{zgrep} +need to try compressed file names, the search order is: lzip, gzip, bzip2, +zstd, xz. (@var{file}.[lz|gz|bz2|zst|xz]). + +NOTE: Bzip2 and lzip provide well-defined values of exit status, which makes +them safe to use with zutils. Gzip and xz may return ambiguous warning +values, making them less reliable back ends for zutils. Zstd currently does +not even document its exit status in its man page. +@xref{compressor-requirements}. + +FORMAT NOTE 1: The option @option{--format} allows the processing of a subset +of formats in recursive mode and when trying compressed file names. For +example, use the following command to search for the string @samp{foo} in +gzip and lzip files only: +@w{@samp{zgrep foo -r --format=gz,lz somedir somefile.tar}}. + +FORMAT NOTE 2: The standard POSIX compress format (.Z) is obsolete and is +only supported through gzip. For this to work, the gzip program used (for +example GNU gzip) must be able to decompress .Z files. + +LANGUAGE NOTE: Uncompressed = not compressed = plain data; it may never have +been compressed. Decompressed is used to refer to data which have undergone +the process of decompression. + + +@node Common options +@chapter Common options +@cindex common options + +The following +@uref{http://www.nongnu.org/arg-parser/manual/arg_parser_manual.html#Argument-syntax,,options}: +are available in all the utilities. Rather than writing identical +descriptions for each of the programs, they are described here. Remember to +prepend @file{./} to any file name beginning with a hyphen, or use @samp{--}. +@ifnothtml +@xref{Argument syntax,,,arg_parser}. +@end ifnothtml + +@table @code +@item -h +@itemx --help +Print an informative help message describing the options and exit. +@command{zgrep} only supports the @option{--help} form of this option. + +@anchor{version} +@item -V +@itemx --version +Print the version number on the standard output and exit. +This version number should be included in all bug reports. +In verbose mode, @command{zdiff} and @command{zgrep} print also the version +of the diff or grep program used respectively. At verbosity level 1 (2 for +@command{zdiff} and @command{zgrep}) or higher, print also the versions of +the compressors used (perhaps limited by option @option{--format}). (The +compressors used must support the option @option{-V} for this to work). + +@item -M @var{format_list} +@itemx --format=@var{format_list} +Process only the formats listed in the comma-separated @var{format_list}. +Valid formats are @samp{bz2}, @samp{gz}, @samp{lz}, @samp{xz}, @samp{zst}, +and @samp{un} for @samp{uncompressed}, meaning "any file name without a +known extension". This option excludes files based on extension, instead of +format, because it is more efficient. The exclusion only applies to names +generated automatically (for example when adding extensions to a file name +or when operating recursively on directories). Files given in the command +line are always processed. + +Each format in @var{format_list} enables file names with the following +extensions: + +@multitable {bz2} {enables} {any other file name} +@item bz2 @tab enables @tab .bz2 .tbz .tbz2 +@item gz @tab enables @tab .gz .tgz .Z +@item lz @tab enables @tab .lz .tlz +@item xz @tab enables @tab .xz .txz +@item zst @tab enables @tab .zst .tzst +@item un @tab enables @tab any other file name +@end multitable + +@item -N +@itemx --no-rcfile +Don't read the runtime configuration file @file{zutils.conf}. + +@item --bz2=@var{command} +@itemx --gz=@var{command} +@itemx --lz=@var{command} +@itemx --xz=@var{command} +@itemx --zst=@var{command} +Set program to be used as decompressor for the corresponding format. +@var{command} may include arguments. For example +@w{@option{--lz='plzip --threads=2'}}. @command{zupdate} uses @option{--lz} +for compression, not for decompression (@pxref{lz-compressor}). The name of +the program can't begin with @samp{-}. These options override the values set +in @file{zutils.conf}. The compression program used must meet three +requirements: + +@anchor{compressor-requirements} +@enumerate +@item +When called with the option @option{-d} and without file names, it must read +compressed data from the standard input and produce decompressed data on the +standard output. +@item +If the option @option{-q} is passed to zutils, the compression program must +also accept it. +@item +It must return 0 if no errors occurred, and a non-zero value otherwise. +@end enumerate + +@end table + +Numbers given as arguments to options may be expressed in decimal, +hexadecimal, or octal (using the same syntax as integer constants in C++), +and may be followed by a multiplier and an optional @samp{B} for "byte". + +Table of SI and binary prefixes (unit multipliers): + +@multitable {Prefix} {kilobyte (10^3 = 1000)} {|} {Prefix} {kibibyte (2^10 = 1024)} +@item Prefix @tab Value @tab | @tab Prefix @tab Value +@item k @tab kilobyte (10^3 = 1000) @tab | @tab Ki @tab kibibyte (2^10 = 1024) +@item M @tab megabyte (10^6) @tab | @tab Mi @tab mebibyte (2^20) +@item G @tab gigabyte (10^9) @tab | @tab Gi @tab gibibyte (2^30) +@item T @tab terabyte (10^12) @tab | @tab Ti @tab tebibyte (2^40) +@item P @tab petabyte (10^15) @tab | @tab Pi @tab pebibyte (2^50) +@item E @tab exabyte (10^18) @tab | @tab Ei @tab exbibyte (2^60) +@item Z @tab zettabyte (10^21) @tab | @tab Zi @tab zebibyte (2^70) +@item Y @tab yottabyte (10^24) @tab | @tab Yi @tab yobibyte (2^80) +@item R @tab ronnabyte (10^27) @tab | @tab Ri @tab robibyte (2^90) +@item Q @tab quettabyte (10^30) @tab | @tab Qi @tab quebibyte (2^100) +@end multitable + + +@node Configuration +@chapter The configuration file 'zutils.conf' +@cindex zutils.conf + +@file{zutils.conf} is the runtime configuration file for zutils. In it you +may define the compressor name and options to be used for each format. +@file{zutils.conf} is optional; you don't need to install it in order to run +zutils. + +The compressors specified in the command line override those specified +in @file{zutils.conf}. + +You may copy the system @file{zutils.conf} file @file{$@{sysconfdir@}/zutils.conf} +to @file{$XDG_CONFIG_HOME/zutils.conf} and customize these options as you like. +(@env{XDG_CONFIG_HOME} defaults to @file{$HOME/.config}). The file syntax is +fairly obvious (and there are further instructions in it): + +@enumerate +@item +Any line beginning with @samp{#} is a comment line. +@item +Each non-comment line defines the command to be used for the corresponding +format, with the syntax: +@example +<format> = <compressor> [options] +@end example +where <format> is one of @samp{bz2}, @samp{gz}, @samp{lz}, @samp{xz}, or +@samp{zst}. +@end enumerate + + +@node Zcat +@chapter Zcat +@cindex zcat + +@command{zcat} copies each @var{file} argument to standard output in +sequence. If any file given is compressed, its decompressed content is +copied. If a file given does not exist, and its name does not end with one +of the known extensions, @command{zcat} tries the compressed file names +corresponding to the formats supported until one is found. +@xref{search-order}. If a file fails to decompress, @command{zcat} continues +copying the rest of the files. + +If a file is specified as @samp{-}, data are read from standard input, +decompressed if needed, and sent to standard output. Data read from +standard input must be of the same type; all uncompressed or all in the +same compressed format. + +If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + +The format for running @command{zcat} is: + +@example +zcat [@var{options}] [@var{files}] +@end example + +@noindent +Exit status is 0 if no errors occurred, 1 otherwise. + +@command{zcat} supports the following options: + +@table @code +@item -A +@itemx --show-all +Equivalent to @option{-vET}. + +@item -b +@itemx --number-nonblank +Number all nonblank output lines, starting with 1. The line count is +unlimited. + +@item -e +Equivalent to @option{-vE}. + +@item -E +@itemx --show-ends +Print a @samp{$} after the end of each line. + +@item -n +@itemx --number +Number all output lines, starting with 1. The line count is unlimited. + +@item -O @var{format} +@itemx --force-format=@var{format} +Force the compressed format given. Valid values for @var{format} are +@samp{bz2}, @samp{gz}, @samp{lz}, @samp{xz}, @samp{zst}, and @samp{un} for +@samp{uncompressed}. If this option is used, the files are passed to the +corresponding decompressor (or transmitted unmodified) without checking +their format, and the exact file name must be given. Other names are not +tried. + +@item -q +@itemx --quiet +Quiet operation. Suppress all messages. + +@item -r +@itemx --recursive +For each directory operand, read and process all files in that directory, +recursively. Follow symbolic links given in the command line, but skip +symbolic links that are encountered recursively. + +@item -R +@itemx --dereference-recursive +For each directory operand, read and process all files in that directory, +recursively, following all symbolic links. + +@item -s +@itemx --squeeze-blank +Replace multiple adjacent blank lines with a single blank line. + +@item -t +Equivalent to @option{-vT}. + +@item -T +@itemx --show-tabs +Print TAB characters as @samp{^I}. + +@item -v +@itemx --show-nonprinting +Print control characters except for LF (newline) and TAB using @samp{^} +notation and precede characters larger than 127 with @samp{M-} (which +stands for "meta"). + +@item --verbose +Verbose mode. Show error messages. Repeating it increases the verbosity +level. @xref{version}. + +@end table + + +@node Zcmp +@chapter Zcmp +@cindex zcmp + +@command{zcmp} compares two files and, if they differ, writes to standard +output the first byte and line number where they differ. Bytes and lines are +numbered starting with 1. A hyphen @samp{-} used as a @var{file} argument +means standard input. If any file given is compressed, its decompressed +content is used. Compressed files are decompressed on the fly; no temporary +files are created. + +The format for running @command{zcmp} is: + +@example +zcmp [@var{options}] @var{file1} [@var{file2}] +@end example + +@noindent +This compares @var{file1} to @var{file2}. The standard input is used only if +@var{file1} or @var{file2} refers to standard input. If @var{file2} is +omitted @command{zcmp} tries to compare @var{file1} with the corresponding +uncompressed file (if @var{file1} is compressed), and then with the +corresponding compressed files of the remaining formats until one is found. +@xref{search-order}. + +@noindent +An exit status of 0 means no differences were found, 1 means some +differences were found, and 2 means trouble. + +@command{zcmp} supports the following options: + +@table @code +@item -b +@itemx --print-bytes +Print the values of the differing bytes (in octal by default) followed by +the bytes themselves in printable form. Print control bytes as a @samp{^} +followed by a letter, and precede bytes larger than 127 with @samp{M-} +(which stands for "meta"). + +@item -H +@itemx --hexadecimal +Print the values of the differing bytes in hexadecimal instead of octal. + +@item -i @var{size} +@itemx --ignore-initial=@var{size} +Ignore any differences in the first @var{size} bytes of the input files. +Treat files with fewer than @var{size} bytes as if they were empty. If +@var{size} is in the form @samp{@var{size1}:@var{size2}}, ignore the +first @var{size1} bytes of the first input file and the first +@var{size2} bytes of the second input file. + +@item -l +@itemx --list +Print the byte numbers (in decimal) and values (in octal by default) of all +differing bytes. Bytes are numbered starting with 1. + +@item -n @var{count} +@itemx --bytes=@var{count} +Compare at most @var{count} input bytes. + +@item -O [@var{format1}][,@var{format2}] +@itemx --force-format=[@var{format1}][,@var{format2}] +Force the compressed formats given. If @var{format1} or @var{format2} is +omitted, the corresponding format is automatically detected. Valid values +for @var{format} are @samp{bz2}, @samp{gz}, @samp{lz}, @samp{xz}, +@samp{zst}, and @samp{un} for @samp{uncompressed}. If at least one format is +specified with this option, the file is passed to the corresponding +decompressor (or transmitted unmodified) without checking its format, and +the exact file names of both @var{file1} and @var{file2} must be given. +Other names are not tried. + +@item -q +@itemx --quiet +@itemx --silent +Suppress diagnostics written to standard error, even the +@w{@samp{EOF on <name_of_shorter_file>}} diagnostic. Byte differences are +still written to standard output. (@option{-q} produces no output except +byte differences). + +@item -s +@itemx --script +Write nothing to standard output or standard error when files differ, not +even the @w{@samp{EOF on <name_of_shorter_file>}} diagnostic; indicate +differing files through exit status only. Diagnostic messages are still +written to standard error when an error is encountered. (@option{-s} +produces no output except error messages). + +@item -v +@itemx --verbose +Verbose mode. Undoes the effect of @option{--quiet}. Further -v's increase +the verbosity level. @xref{version}. + +@end table + + +@node Zdiff +@chapter Zdiff +@cindex zdiff + +@command{zdiff} compares two files and, if they differ, writes to standard +output the differences line by line. A hyphen @samp{-} used as a @var{file} +argument means standard input. If any file given is compressed, its +decompressed content is used. @command{zdiff} is a front end to the program +diff and has the limitation that messages from diff refer to temporary file +names instead of those specified. + +The format for running @command{zdiff} is: + +@example +zdiff [@var{options}] @var{file1} [@var{file2}] +@end example + +@noindent +This compares @var{file1} to @var{file2}. The standard input is used only if +@var{file1} or @var{file2} refers to standard input. If @var{file2} is +omitted @command{zdiff} tries to compare @var{file1} with the corresponding +uncompressed file (if @var{file1} is compressed), and then with the +corresponding compressed files of the remaining formats until one is found. +@xref{search-order}. + +@noindent +An exit status of 0 means no differences were found, 1 means some +differences were found, and 2 means trouble. + +@command{zdiff} supports the following options (some options only work if +the diff program used supports them): + +@table @code +@item -a +@itemx --text +Treat all files as text. + +@item -b +@itemx --ignore-space-change +Ignore changes in the amount of white space. + +@item -B +@itemx --ignore-blank-lines +Ignore changes whose lines are all blank. + +@item -c +Use the context output format. + +@item -C @var{n} +@itemx --context=@var{n} +Same as -c but use @var{n} lines of context. + +@item -d +@itemx --minimal +Try hard to find a smaller set of changes. + +@item -E +@itemx --ignore-tab-expansion +Ignore changes due to tab expansion. + +@item -i +@itemx --ignore-case +Ignore case differences. Consider uppercase and lowercase letters equivalent. + +@item -O [@var{format1}][,@var{format2}] +@itemx --force-format=[@var{format1}][,@var{format2}] +Force the compressed formats given. If @var{format1} or @var{format2} is +omitted, the corresponding format is automatically detected. Valid values +for @var{format} are @samp{bz2}, @samp{gz}, @samp{lz}, @samp{xz}, +@samp{zst}, and @samp{un} for @samp{uncompressed}. If at least one format is +specified with this option, the file is passed to the corresponding +decompressor (or transmitted unmodified) without checking its format, and +the exact file names of both @var{file1} and @var{file2} must be given. +Other names are not tried. + +@item -p +@itemx --show-c-function +Show which C function each change is in. + +@item -q +@itemx --brief +Output only whether files differ. + +@item -s +@itemx --report-identical-files +Report when two files are identical. + +@item -t +@itemx --expand-tabs +Expand tabs to spaces in output. + +@item -T +@itemx --initial-tab +Make tabs line up by prepending a tab. + +@item -u +Use the unified output format. + +@item -U @var{n} +@itemx --unified=@var{n} +Same as -u but use @var{n} lines of context. + +@item -v +@itemx --verbose +When specified before @option{--version}, print the version of the diff +program used. Further -v's increase the verbosity level. @xref{version}. + +@item -w +@itemx --ignore-all-space +Ignore all white space. + +@item -W @var{columns} +@itemx --width=@var{columns} +Output at most the specified number of print columns per line in side by +side format. + +@item -y +@itemx --side-by-side +Use the side by side output format. + +@end table + + +@node Zgrep +@chapter Zgrep +@cindex zgrep + +@command{zgrep} is a front end to the program grep that allows transparent +search on any combination of compressed and uncompressed files. If any file +given is compressed, its decompressed content is used. If a file given does +not exist, and its name does not end with one of the known extensions, +@command{zgrep} tries the compressed file names corresponding to the formats +supported until one is found. @xref{search-order}. If a file fails to +decompress, @command{zgrep} continues searching the rest of the files. + +If a file is specified as @samp{-}, data are read from standard input, +decompressed if needed, and fed to grep. Data read from standard input must +be of the same type; all uncompressed or all in the same compressed format. + +If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + +For efficiency reasons, @command{zgrep} does not always read all its input. +For example, the shell command @w{@samp{base64 -d foo | zgrep -q X}} can +cause @command{zgrep} to exit immediately after reading a line containing +@samp{X}, without bothering to read the rest of its input data. This in turn +can cause base64 to exit with a nonzero status because base64 cannot write +to its output pipe after @command{zgrep} exits. + +The format for running @command{zgrep} is: + +@example +zgrep [@var{options}] @var{pattern} [@var{files}] +@end example + +@noindent +An exit status of 0 means at least one match was found, 1 means no +matches were found, and 2 means trouble. + +@command{zgrep} supports the following options (Some options only work if +the grep program used supports them. Options -h, -H, -r, -R, and -Z are +managed by @command{zgrep} and not passed to grep): + +@table @code +@item -a +@itemx --text +Treat all files as text. + +@item -A @var{n} +@itemx --after-context=@var{n} +Print @var{n} lines of trailing context. + +@item -b +@itemx --byte-offset +Print the byte offset of each line. + +@item -B @var{n} +@itemx --before-context=@var{n} +Print @var{n} lines of leading context. + +@item -c +@itemx --count +Only print a count of matching lines per file. + +@item -C @var{n} +@itemx --context=@var{n} +Print @var{n} lines of output context. + +@item --color[=@var{when}] +Show matched strings in color. @var{when} is @samp{never}, @samp{always}, +or @samp{auto}. + +@item -e @var{pattern} +@itemx --regexp=@var{pattern} +Use @var{pattern} as the pattern to match. + +@item -E +@itemx --extended-regexp +Interpret @var{pattern} as an extended regular expression (ERE). + +@item -f @var{file} +@itemx --file=@var{file} +Obtain patterns from @var{file}, one per line.@* +When searching in several files at once, command substitution can be used +with @option{-e} to read @var{file} only once, for example if @var{file} is +not a regular file: +@w{@samp{zgrep -e "$(cat @var{file})" file1.lz file2.gz}} + +@item -F +@itemx --fixed-strings +Interpret @var{pattern} as a set of newline-separated strings. + +@item -G +@itemx --basic-regexp +Interpret @var{pattern} as a basic regular expression (BRE). This is the +default. + +@item -h +@itemx --no-filename +Suppress the prefixing of file names on output when multiple files are +searched. + +@item -H +@itemx --with-filename +Print the file name for each match. + +@item -i +@itemx --ignore-case +Ignore case distinctions. + +@item -I +Ignore binary files. + +@item -l +@itemx --files-with-matches +Only print names of files containing at least one match. Stop reading each +file on the first match. + +@item -L +@itemx --files-without-match +Only print names of files not containing any matches. Stop reading each file +on the first match.@* +Note: option -L fails (prints wrong results, returns wrong status, and even +hangs) when using GNU grep versions 3.2 to 3.4 inclusive because of a wrong +change in the exit status of grep, which was reverted in GNU grep 3.5. + +@item --label=@var{label} +Display input actually coming from standard input as input coming from file +@var{label}. + +@item --line-buffered +Use line buffering on output. This may cause a performance penalty. + +@item -m @var{n} +@itemx --max-count=@var{n} +Stop after @var{n} matches. + +@item -n +@itemx --line-number +Prefix each matched line with its line number in the input file. + +@item -o +@itemx --only-matching +Show only the part of matching lines that actually matches @var{pattern}. + +@item -O @var{format} +@itemx --force-format=@var{format} +Force the compressed format given. Valid values for @var{format} are +@samp{bz2}, @samp{gz}, @samp{lz}, @samp{xz}, @samp{zst}, and @samp{un} for +@samp{uncompressed}. If this option is used, the files are passed to the +corresponding decompressor (or transmitted unmodified) without checking +their format, and the exact file name must be given. Other names are not +tried. + +@item -P +@itemx --perl-regexp +Interpret @var{pattern} as a Perl-compatible regular expression (PCRE). + +@item -q +@itemx --quiet +@itemx --silent +Suppress all messages. Exit immediately with zero status if any match is +found, even if an error was detected. + +@item -r +@itemx --recursive +For each directory operand, read and process all files in that directory, +recursively. Follow symbolic links given in the command line, but skip +symbolic links that are encountered recursively. + +@item -R +@itemx --dereference-recursive +For each directory operand, read and process all files in that directory, +recursively, following all symbolic links. + +@item -s +@itemx --no-messages +Suppress error messages about nonexistent or unreadable files. + +@item -T +@itemx --initial-tab +Make sure that the first character of actual line content lies on a tab +stop, so that the alignment of tabs looks normal. + +@item -U +@itemx --binary +Use binary I/O on platforms affected by the bug known as "text mode I/O". +(MS-DOS, MS-Windows, OS/2). + +@item -v +@itemx --invert-match +Select non-matching lines. + +@item --verbose +Verbose mode. Show error messages. When specified before @option{--version}, +print the version of the grep program used. Repeating it increases the +verbosity level. @xref{version}. + +@item -w +@itemx --word-regexp +Match only whole words. + +@item -x +@itemx --line-regexp +Match only whole lines. + +@item -Z +@itemx --null +Output a zero byte (the ASCII NUL character) instead of the character that +normally follows a file name. For example, @w{@samp{zgrep -lZ}} outputs a +zero byte after each file name instead of the usual newline. This option +makes the output unambiguous, even in the presence of file names containing +unusual characters like newlines. + +@end table + + +@node Ztest +@chapter Ztest +@cindex ztest + +@command{ztest} checks the integrity of the compressed files specified. It +also warns if an uncompressed file has a compressed file name extension, or +if a compressed file has a wrong compressed extension. Uncompressed files +are otherwise ignored. If a file is specified as @samp{-}, the integrity of +compressed data read from standard input is checked. Data read from +standard input must be all in the same compressed format. If a file fails to +decompress, does not exist, can't be opened, or is a terminal, @command{ztest} +continues testing the rest of the files. A final diagnostic is shown at +verbosity level 1 or higher if any file fails the test when testing multiple +files. + +If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches read standard input. + +Bzip2, gzip, and lzip are the primary formats. Xz and zstd are optional. If +the decompressor for the xz or zstd formats is not found, the corresponding +files are ignored. + +Note that error detection in the xz format is broken. First, some xz files +lack integrity information. Second, not all xz decompressors can +@uref{http://www.nongnu.org/lzip/xz_inadequate.html#fragmented,,check the integrity} +of all xz files. Third, section 2.1.1.2 'Stream Flags' of the +@uref{http://tukaani.org/xz/xz-file-format.txt,,xz format specification} +allows xz decompressors to produce garbage output without issuing any +warning. Therefore, xz files can't always be checked as reliably as files in +the other formats can. +@c We can only hope that xz is soon abandoned. + +The format for running @command{ztest} is: + +@example +ztest [@var{options}] [@var{files}] +@end example + +@noindent +Exit status is 0 if all compressed files check OK, 1 if environmental +problems (file not found, invalid command-line options, I/O errors, etc), +2 if any compressed file is corrupt or invalid, or if any file has an +incorrect file name extension. + +@command{ztest} supports the following options: + +@table @code +@item -O @var{format} +@itemx --force-format=@var{format} +Force the compressed format given. Valid values for @var{format} are +@samp{bz2}, @samp{gz}, @samp{lz}, @samp{xz}, and @samp{zst}. If this option +is used, the files are passed to the corresponding decompressor without +checking their format, and any files in a format that the decompressor can't +understand fail the test. + +@item -q +@itemx --quiet +Quiet operation. Suppress all messages. + +@item -r +@itemx --recursive +For each directory operand, read and process all files in that directory, +recursively. Follow symbolic links given in the command line, but skip +symbolic links that are encountered recursively. + +@item -R +@itemx --dereference-recursive +For each directory operand, read and process all files in that directory, +recursively, following all symbolic links. + +@item -v +@itemx --verbose +Verbose mode. Show the check status for each file processed. Further -v's +increase the verbosity level. @xref{version}. + +@end table + + +@node Zupdate +@chapter Zupdate +@cindex zupdate + +@command{zupdate} recompresses files from bzip2, gzip, xz, and zstd formats +to lzip format. Each original is compared with the new file and then +deleted. Only regular files with standard file name extensions are +recompressed, other files are ignored. Compressed files are decompressed and +then recompressed on the fly; no temporary files are created. If an error +happens while recompressing a file, @command{zupdate} exits immediately +without recompressing the rest of the files. The lzip format is chosen as +destination because it is the most appropriate for long-term archiving. + +If no files are specified, recursive searches examine the current working +directory, and nonrecursive searches do nothing. + +If the lzip-compressed version of a file already exists, the file is skipped +unless the option @option{--force} is given. In this case, if the comparison +with the existing lzip version fails, an error is returned and the original +file is not deleted. The operation of @command{zupdate} is meant to be safe +and not cause any data loss. Therefore, existing lzip-compressed files are +never overwritten nor deleted. + +Combining the options @option{--force} and @option{--keep}, as in +@w{@samp{zupdate -f -k *.gz}}, checks that there are no differences between +each pair of files in a multiformat set of files. + +The names of the original files must have one of the following extensions:@* +@samp{.bz2}, @samp{.gz}, @samp{.xz}, @samp{.zst}, or @samp{.Z}, which are +recompressed to @samp{.lz};@* +@samp{.tbz}, @samp{.tbz2}, @samp{.tgz}, @samp{.txz}, or @samp{.tzst}, which +are recompressed to @samp{.tlz}.@* +Keeping the combined extensions @w{(@samp{.tgz} ---> @samp{.tlz})} may be +useful when recompressing Slackware packages, for example. + +Bzip2, gzip, and lzip are the primary formats. Xz and zstd are optional. If +the decompressor for the xz or zstd formats is not found, the corresponding +files are ignored. + +Recompressing a file is much like copying or moving it. Therefore +@command{zupdate} preserves the access and modification dates, permissions, +and, if you have appropriate privileges, ownership of the file just as +@w{@samp{cp -p}} does. (If the user ID or the group ID can't be duplicated, +the file permission bits S_ISUID and S_ISGID are cleared). + +The format for running @command{zupdate} is: + +@example +zupdate [@var{options}] [@var{files}] +@end example + +@noindent +Exit status is 0 if all the compressed files were successfully recompressed +(if needed), compared, and deleted (if requested). 1 if a non-fatal error +occurred (file not found or not regular, or has invalid format, or can't be +deleted). 2 if a fatal error occurred (invalid command-line options, +compressor can't be run, or comparison fails). + +@command{zupdate} supports the following options: + +@table @code +@item -d @var{dir} +@itemx --destdir=@var{dir} +Write recompressed files to another directory, using @var{dir} as base +directory, instead of writing them in the same directory as the original +files. In recursive mode, this is done by replacing each directory specified +in the command line with @var{dir} to produce the recompressed file names. +For example, @w{@samp{zupdate -r -d @var{dir} ../a}} recompresses a file +named @file{../a/b/c.gz} to @file{@var{dir}/b/c.lz}. Regular files specified +in the command line are recompressed directly into @var{dir}. For example, +@w{@samp{zupdate -d @var{dir} ../a/b/c.gz}} writes the recompressed file to +@file{@var{dir}/c.lz}. + +This option allows recompressing files from a read-only file system to +another place without the need to copy or link them to the destination +directory first. (Remember to use option @option{--keep} when recompressing +read-only files to avoid warnings about files that can't be deleted). + +@item -e +@itemx --expand-extensions +Expand combined file name extensions; recompress @samp{.tbz}, @samp{.tbz2}, +@samp{.tgz}, @samp{.txz}, and @samp{.tzst} to @samp{tar.lz}. + +@item -f +@itemx --force +Don't skip a file for which a lzip-compressed version already exists. +@option{--force} compares the content of the input file with the content of +the existing lzip file and deletes the input file if both contents are +identical. + +@item -i +@itemx --ignore-errors +Ignore non-fatal errors. (See exit status above). + +@item -k +@itemx --keep +Keep (don't delete) the input file after comparing it with the lzip file. +Use it when recompressing files from a read-only file system. (See option +@option{--destdir} above). + +@item -l +@itemx --lzip-verbose +Pass one option @option{-v} to the lzip compressor so that it shows the +compression ratio for each file processed. Using lzip 1.15 or newer, a +second @option{-l} shows the progress of compression. Use it together with +@option{-v} to see the name of the file. + +@item -q +@itemx --quiet +Quiet operation. Suppress all messages. + +@item -r +@itemx --recursive +For each directory operand, read and process all files in that directory, +recursively. Follow symbolic links given in the command line, but skip +symbolic links that are encountered recursively. + +@item -R +@itemx --dereference-recursive +For each directory operand, read and process all files in that directory, +recursively, following all symbolic links. + +@item -v +@itemx --verbose +Verbose mode. Show the files being processed. A second @option{-v} also shows +the files being ignored and increases the verbosity level. @xref{version}. + +@item -0 .. -9 +Set the compression level of lzip. By default @command{zupdate} passes +@option{-9} to lzip. Custom compression options can be passed to lzip with +the option @option{--lz}. For example @w{@option{--lz='lzip -9 -s64MiB'}}. + +@anchor{lz-compressor} +@item --lz=@var{command} +Set compression command. @var{command} may include arguments. For example +@w{@option{--lz='plzip --threads=2'}}. The name of the program can't begin +with @samp{-}. This option overrides the value set in @file{zutils.conf}. +The compression program used does not need to implement decompression +(@pxref{compressor-requirements}), but it must implement at least the +compression level option @option{-9} and the option @w{@option{-o @var{file}}} +to write the compressed output to @var{file}. +@uref{http://www.nongnu.org/lzip/manual/tarlz_manual.html,,tarlz} meets +these requirements, and therefore can be used to recompress POSIX tar +archives by using a command like +@w{@samp{zupdate --lz='tarlz -9 -z --no-solid' archive.tar.gz}}. +@ifnothtml +@xref{Top,tarlz manual,,tarlz}. +@end ifnothtml + +@end table + + +@node Problems +@chapter Reporting bugs +@cindex bugs +@cindex getting help + +There are probably bugs in zutils. There are certainly errors and +omissions in this manual. If you report them, they will get fixed. If +you don't, no one will ever know about them and they will remain unfixed +for all eternity, if not longer. + +If you find a bug in zutils, please send electronic mail to +@email{zutils-bug@@nongnu.org}. Include the version number, which you can +find by running @w{@samp{zupdate --version}}. + + +@node Concept index +@unnumbered Concept index + +@printindex cp + +@bye @@ -0,0 +1,457 @@ +/* Zutils - Utilities dealing with compressed files + Copyright (C) 2009-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <string> +#include <vector> +#include <unistd.h> +#include <sys/wait.h> + +#include "arg_parser.h" +#include "rc.h" + + +const char * invocation_name = 0; +const char * program_name = 0; +int verbosity = 0; + +namespace { + +const char * const config_file_name = "zutils.conf"; +const char * const program_year = "2024"; + +std::string compressor_names[num_formats] = + { "bzip2", "gzip", "lzip", "xz", "zstd" }; // default compressor names + +// args to compressors read from .conf or from options like --lz, maybe empty +std::vector< std::string > compressor_args[num_formats]; + +// vector of enabled formats plus [num_formats] for uncompressed. +// empty or incomplete (size <= num_formats) means all enabled. +std::vector< bool > enabled_formats; + +const struct { const char * from; const char * to; int format_index; } + known_extensions[] = { + { ".bz2", "", fmt_bz2 }, + { ".tbz", ".tar", fmt_bz2 }, + { ".tbz2", ".tar", fmt_bz2 }, + { ".gz", "", fmt_gz }, + { ".tgz", ".tar", fmt_gz }, + { ".lz", "", fmt_lz }, + { ".tlz", ".tar", fmt_lz }, + { ".xz", "", fmt_xz }, + { ".txz", ".tar", fmt_xz }, + { ".zst", "", fmt_zst }, + { ".tzst", ".tar", fmt_zst }, + { ".Z", "", fmt_gz }, + { 0, 0, -1 } }; + + +int my_fgetc( FILE * const f ) + { + int ch; + bool comment = false; + + do { + ch = std::fgetc( f ); + if( ch == '#' ) comment = true; + else if( ch == '\n' || ch == EOF ) comment = false; + else if( ch == '\\' && comment ) + { + const int c = std::fgetc( f ); + if( c == '\n' ) { std::ungetc( c, f ); comment = false; } + } + } + while( comment ); + return ch; + } + + +// Return the parity of escapes (backslashes) at the end of a string. +bool trailing_escape( const std::string & s ) + { + unsigned len = s.size(); + bool odd_escape = false; + while( len > 0 && s[--len] == '\\' ) odd_escape = !odd_escape; + return odd_escape; + } + + +/* Read a line discarding comments, leading whitespace, and blank lines. + Escaped newlines are discarded. + Return the empty string if at EOF. +*/ +const std::string & my_fgets( FILE * const f, int & linenum ) + { + static std::string s; + bool strip = true; // strip leading whitespace + s.clear(); + + while( true ) + { + int ch = my_fgetc( f ); + if( strip ) + { + strip = false; + while( std::isspace( ch ) ) + { if( ch == '\n' ) { ++linenum; } ch = my_fgetc( f ); } + } + if( ch == EOF ) { if( s.size() ) { ++linenum; } break; } + else if( ch == '\n' ) + { + ++linenum; strip = true; + if( trailing_escape( s ) ) s.erase( s.size() - 1 ); + else if( s.size() ) break; + } + else s += ch; + } + return s; + } + + +bool parse_compressor_command( const std::string & s, int i, + const int format_index ) + { + const int len = s.size(); + while( i < len && std::isspace( s[i] ) ) ++i; // strip spaces + int l = i; + while( i < len && !std::isspace( s[i] ) ) ++i; + if( l >= i || s[l] == '-' ) return false; + compressor_names[format_index].assign( s, l, i - l ); + + compressor_args[format_index].clear(); + while( i < len ) + { + while( i < len && std::isspace( s[i] ) ) ++i; // strip spaces + l = i; + while( i < len && !std::isspace( s[i] ) ) ++i; + if( l < i ) + compressor_args[format_index].push_back( std::string( s, l, i - l ) ); + } + return true; + } + + +bool parse_rc_line( const std::string & line, + const char * const filename, const int linenum ) + { + const int len = line.size(); + int i = 0; + while( i < len && std::isspace( line[i] ) ) ++i; // strip spaces + int l = i; + while( i < len && line[i] != '=' && !std::isspace( line[i] ) ) ++i; + if( l >= i ) + { if( verbosity >= 0 ) + std::fprintf( stderr, "%s %d: missing format name.\n", filename, linenum ); + return false; } + const std::string name( line, l, i - l ); + int format_index = -1; + for( int j = 0; j < num_formats; ++j ) + if( name == format_names[j] ) { format_index = j; break; } + if( format_index < 0 ) + { if( verbosity >= 0 ) + std::fprintf( stderr, "%s %d: bad format name '%s'\n", + filename, linenum, name.c_str() ); + return false; } + + while( i < len && std::isspace( line[i] ) ) ++i; // strip spaces + if( i <= 0 || i >= len || line[i] != '=' ) + { if( verbosity >= 0 ) + std::fprintf( stderr, "%s %d: missing '='\n", filename, linenum ); + return false; } + ++i; // skip the '=' + if( !parse_compressor_command( line, i, format_index ) ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s %d: missing compressor name.\n", filename, linenum ); + return false; + } + return true; + } + + + // Return 0 if success, 1 if file not found, 2 if syntax or I/O error. +int process_rcfile( const std::string & name ) + { + FILE * const f = std::fopen( name.c_str(), "r" ); + if( !f ) return 1; + + int linenum = 0; + int retval = 0; + + while( true ) + { + const std::string & line = my_fgets( f, linenum ); + if( line.empty() ) break; // EOF + if( !parse_rc_line( line, name.c_str(), linenum ) ) + { retval = 2; break; } + } + if( std::fclose( f ) != 0 && retval == 0 ) + { show_file_error( name.c_str(), "Error closing config file", errno ); + retval = 2; } + return retval; + } + + +void show_using_version( const char * const command ) + { + FILE * const f = popen( command, "r" ); + if( f ) + { + char command_version[1024] = { 0 }; + const int rd = std::fread( command_version, 1, sizeof command_version, f ); + pclose( f ); + int i = 0; + while( i + 1 < rd && command_version[i] != '\n' ) ++i; + command_version[i] = 0; + if( command_version[0] ) std::printf( "Using %s\n", command_version ); + } + } + +} // end namespace + + +bool enabled_format( const int format_index ) + { + if( enabled_formats.size() <= num_formats ) return true; // all enabled + if( format_index < 0 || format_index >= num_formats ) + return enabled_formats[num_formats]; // uncompressed + return enabled_formats[format_index]; + } + + +void parse_format_list( const std::string & arg, const char * const pn ) + { + bool error = arg.empty(); + enabled_formats.assign( num_formats + 1, false ); + + for( unsigned l = 0, r; l < arg.size(); l = r + 1 ) + { + r = std::min( arg.find( ',', l ), arg.size() ); + if( l >= r ) { error = true; break; } // empty format + int format_index = num_formats; + const std::string s( arg, l, r - l ); + for( int i = 0; i < num_formats; ++i ) + if( s == format_names[i] ) + { format_index = i; break; } + if( format_index == num_formats && s != "un" ) // uncompressed + { error = true; break; } + enabled_formats[format_index] = true; + } + if( !error ) return; + show_option_error( arg.c_str(), "Invalid format in", pn ); + std::exit( 1 ); + } + + +int parse_format_type( const std::string & arg, const char * const pn, + const bool allow_uncompressed ) + { + for( int i = 0; i < num_formats; ++i ) + if( arg == format_names[i] ) + return i; + if( allow_uncompressed && arg == "un" ) return num_formats; + show_option_error( arg.c_str(), ( arg.find( ',' ) < arg.size() ) ? + "Too many formats in" : "Invalid format in", pn ); + std::exit( 1 ); + } + + +int extension_index( const std::string & name ) + { + for( int eindex = 0; known_extensions[eindex].from; ++eindex ) + { + const std::string ext( known_extensions[eindex].from ); + if( name.size() > ext.size() && + name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 ) + return eindex; + } + return -1; + } + +int extension_format( const int eindex ) + { return ( eindex >= 0 ) ? known_extensions[eindex].format_index : -1; } + +const char * extension_from( const int eindex ) + { return ( eindex >= 0 ) ? known_extensions[eindex].from : ""; } + +const char * extension_to( const int eindex ) + { return known_extensions[eindex].to; } + + +void maybe_process_config_file( const Arg_parser & parser ) + { + for( int i = 0; i < parser.arguments(); ++i ) + if( parser.code( i ) == 'N' ) return; + std::string name; + const char * p = std::getenv( "XDG_CONFIG_HOME" ); if( p ) name = p; + else { p = std::getenv( "HOME" ); if( p ) { name = p; name += "/.config"; } } + if( name.size() ) + { + name += '/'; name += config_file_name; + const int retval = process_rcfile( name ); + if( retval == 0 ) return; + if( retval == 2 ) std::exit( 2 ); + } + name = SYSCONFDIR; name += '/'; name += config_file_name; + const int retval = process_rcfile( name ); + if( retval == 2 ) std::exit( 2 ); + } + + +void parse_compressor( const std::string & arg, const char * const pn, + const int format_index, const int eretval ) + { + if( !parse_compressor_command( arg, 0, format_index ) ) + { show_option_error( arg.c_str(), "Invalid compressor command in", pn ); + std::exit( eretval ); } + } + + +const char * get_compressor_name( const int format_index ) + { + if( format_index >= 0 && format_index < num_formats && + compressor_names[format_index].size() ) + return compressor_names[format_index].c_str(); + return 0; // uncompressed/unknown + } + + +const std::vector< std::string > & get_compressor_args( const int format_index ) + { + return compressor_args[format_index]; + } + + +void show_help_addr() + { + std::printf( "\nReport bugs to zutils-bug@nongnu.org\n" + "Zutils home page: http://www.nongnu.org/zutils/zutils.html\n" ); + } + + +void show_version( const char * const command ) + { + std::printf( "%s (zutils) %s\n", program_name, PROGVERSION ); + std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); + if( command && verbosity >= 1 ) show_using_version( command ); + if( verbosity >= 1 + ( command != 0 ) ) + for( int format_index = 0; format_index < num_formats; ++format_index ) + { + if( !enabled_format( format_index ) ) continue; + std::string compressor_command( compressor_names[format_index] ); + if( compressor_command.empty() ) continue; + compressor_command += " -V 2> /dev/null"; + show_using_version( compressor_command.c_str() ); + } + std::printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n" ); + } + + +void show_error( const char * const msg, const int errcode, const bool help ) + { + if( verbosity < 0 ) return; + if( msg && msg[0] ) + std::fprintf( stderr, "%s: %s%s%s\n", program_name, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + if( help ) + std::fprintf( stderr, "Try '%s --help' for more information.\n", + invocation_name ); + } + + +void show_file_error( const char * const filename, const char * const msg, + const int errcode ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + } + + +void internal_error( const char * const msg ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); + std::exit( 3 ); + } + + +void show_option_error( const char * const arg, const char * const msg, + const char * const option_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: '%s': %s option '%s'.\n", + program_name, arg, msg, option_name ); + } + + +void show_close_error( const char * const prog_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Error closing output of %s: %s\n", + program_name, prog_name, std::strerror( errno ) ); + } + + +void show_exec_error( const char * const prog_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't exec '%s': %s\n", + program_name, prog_name, std::strerror( errno ) ); + } + + +void show_fork_error( const char * const prog_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't fork '%s': %s\n", + program_name, prog_name, std::strerror( errno ) ); + } + + +int wait_for_child( const pid_t pid, const char * const name, + const int eretval, const bool isgzxz ) + { + int status; + while( waitpid( pid, &status, 0 ) == -1 ) + { + if( errno != EINTR ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Error waiting termination of '%s': %s\n", + program_name, name, std::strerror( errno ) ); + _exit( eretval ); + } + } + if( WIFEXITED( status ) ) + { + const int tmp = WEXITSTATUS( status ); + if( isgzxz && eretval == 1 && tmp == 1 ) return 2; // for ztest + return tmp; + } + return eretval; + } @@ -0,0 +1,74 @@ +/* Zutils - Utilities dealing with compressed files + Copyright (C) 2009-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +// format_index; < 0 means undefined, >= num_formats means uncompressed +enum { fmt_bz2, fmt_gz, fmt_lz, fmt_xz, fmt_zst, num_formats }; + +const char * const format_names[num_formats] = + { "bz2", "gz", "lz", "xz", "zst" }; +const char * const simple_extensions[num_formats] = + { ".bz2", ".gz", ".lz", ".xz", ".zst" }; +const int format_order[num_formats] = + { fmt_lz, fmt_gz, fmt_bz2, fmt_zst, fmt_xz }; // search order + +bool enabled_format( const int format_index ); // -1 == uncompressed +void parse_format_list( const std::string & arg, const char * const pn ); +// Return num_formats if arg == "un" (uncompressed). +int parse_format_type( const std::string & arg, const char * const pn, + const bool allow_uncompressed = true ); + +int extension_index( const std::string & name ); // -1 if unknown +int extension_format( const int eindex ); // -1 if uncompressed +const char * extension_from( const int eindex ); // -1 if uncompressed +const char * extension_to( const int eindex ); + +// Return format_index, or -1 if uncompressed. +// +inline int test_extension( const std::string & name ) + { return extension_format( extension_index( name ) ); } + +extern const char * invocation_name; +extern const char * program_name; +extern int verbosity; + +class Arg_parser; + +void maybe_process_config_file( const Arg_parser & parser ); + +void parse_compressor( const std::string & arg, const char * const pn, + const int format_index, const int eretval = 2 ); + +const char * get_compressor_name( const int format_index ); +const std::vector< std::string > & get_compressor_args( const int format_index ); + +void show_help_addr(); +void show_version( const char * const command = 0 ); +void show_error( const char * const msg, const int errcode = 0, + const bool help = false ); +void show_file_error( const char * const filename, const char * const msg, + const int errcode = 0 ); +void internal_error( const char * const msg ); +void show_option_error( const char * const arg, const char * const msg, + const char * const option_name ); +void show_close_error( const char * const prog_name = "data feeder" ); +void show_exec_error( const char * const prog_name ); +void show_fork_error( const char * const prog_name ); + +// Return exit status of child process 'pid', or 'eretval' in case of error. +// +int wait_for_child( const pid_t pid, const char * const name, + const int eretval = 2, const bool isgzxz = false ); diff --git a/recursive.cc b/recursive.cc new file mode 100644 index 0000000..21c33c9 --- /dev/null +++ b/recursive.cc @@ -0,0 +1,109 @@ +/* Zutils - Utilities dealing with compressed files + Copyright (C) 2009-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* Return true if full_name is a regular file with an enabled extension + or (a link to) a directory. */ +bool test_full_name( const std::string & full_name, const struct stat * stp, + const bool follow ) + { + struct stat st, st2; + if( follow && stat( full_name.c_str(), &st ) != 0 ) return false; + if( !follow && lstat( full_name.c_str(), &st ) != 0 ) return false; + if( S_ISREG( st.st_mode ) ) // regular file + return enabled_format( extension_format( extension_index( full_name ) ) ); + if( !S_ISDIR( st.st_mode ) ) return false; + + std::string prev_dir( full_name ); + bool loop = ( stp && st.st_ino == stp->st_ino && st.st_dev == stp->st_dev ); + if( !loop ) + for( unsigned i = prev_dir.size(); i > 1; ) + { + while( i > 0 && prev_dir[i-1] != '/' ) --i; + if( i == 0 ) break; + if( i > 1 ) --i; // remove trailing slash except at root dir + prev_dir.resize( i ); + if( stat( prev_dir.c_str(), &st2 ) != 0 || !S_ISDIR( st2.st_mode ) || + ( st.st_ino == st2.st_ino && st.st_dev == st2.st_dev ) ) + { loop = true; break; } + } + if( loop ) // full_name already visited or above tree + show_file_error( full_name.c_str(), "warning: recursive directory loop." ); + return !loop; // (link to) directory + } + + +/* Return in input_filename the next file name, or "." for stdin. + ("." was chosen instead of "-" because "." is not a valid file name). + Set 'error' to true if a directory fails to open. */ +bool next_filename( std::list< std::string > & filenames, + std::string & input_filename, bool & error, + const int recursive, const bool ignore_stdin = false, + const bool no_messages = false ) + { + while( !filenames.empty() ) + { + input_filename = filenames.front(); + filenames.pop_front(); + if( input_filename == "-" ) + { + if( ignore_stdin ) continue; + input_filename = "."; return true; + } + struct stat st; + if( stat( input_filename.c_str(), &st ) == 0 && S_ISDIR( st.st_mode ) ) + { + if( recursive ) + { + DIR * const dirp = opendir( input_filename.c_str() ); + if( !dirp ) + { + if( !no_messages ) + show_file_error( input_filename.c_str(), "Can't open directory", errno ); + error = true; continue; + } + for( unsigned i = input_filename.size(); + i > 1 && input_filename[i-1] == '/'; --i ) + input_filename.resize( i - 1 ); // remove trailing slashes + struct stat stdot, *stdotp = 0; + if( input_filename[0] != '/' ) // relative file name + { + if( input_filename == "." ) input_filename.clear(); + if( stat( ".", &stdot ) == 0 && S_ISDIR( stdot.st_mode ) ) + stdotp = &stdot; + } + if( input_filename.size() && input_filename != "/" ) + input_filename += '/'; + std::list< std::string > tmp_list; + while( true ) + { + const struct dirent * const entryp = readdir( dirp ); + if( !entryp ) { closedir( dirp ); break; } + const std::string tmp_name( entryp->d_name ); + if( tmp_name == "." || tmp_name == ".." ) continue; + const std::string full_name( input_filename + tmp_name ); + if( test_full_name( full_name, stdotp, recursive == 2 ) ) + tmp_list.push_back( full_name ); + } + filenames.splice( filenames.begin(), tmp_list ); + } + continue; + } + return true; + } + input_filename.clear(); + return false; + } diff --git a/testsuite/check.sh b/testsuite/check.sh new file mode 100755 index 0000000..730dfa6 --- /dev/null +++ b/testsuite/check.sh @@ -0,0 +1,685 @@ +#! /bin/sh +# check script for Zutils - Utilities dealing with compressed files +# Copyright (C) 2009-2024 Antonio Diaz Diaz. +# +# This script is free software: you have unlimited permission +# to copy, distribute, and modify it. + +LC_ALL=C +export LC_ALL +objdir=`pwd` +testdir=`cd "$1" ; pwd` +ZCAT="${objdir}"/zcat +ZCMP="${objdir}"/zcmp +ZDIFF="${objdir}"/zdiff +ZGREP="${objdir}"/zgrep +ZEGREP="${objdir}"/zegrep +ZFGREP="${objdir}"/zfgrep +ZTEST="${objdir}"/ztest +ZUPDATE="${objdir}"/zupdate +compressors="bzip2 gzip lzip" +extensions="bz2 gz lz" +compressor_needed() { echo "${compressors} are needed to run tests" ; exit 1 ; } +framework_failure() { echo "failure in testing framework" ; exit 1 ; } + +if [ ! -f "${ZUPDATE}" ] || [ ! -x "${ZUPDATE}" ] ; then + echo "${ZUPDATE}: cannot execute" + exit 1 +fi + +[ -e "${ZUPDATE}" ] 2> /dev/null || + { + echo "$0: a POSIX shell is required to run the tests" + echo "Try bash -c \"$0 $1 $2\"" + exit 1 + } + +if [ -d tmp ] ; then rm -rf tmp ; fi +mkdir tmp +cd "${objdir}"/tmp || framework_failure + +for i in ${compressors}; do + cat "${testdir}"/test.txt > in || framework_failure + $i in || compressor_needed + printf "Hello World!\n" > hello || framework_failure + $i hello || compressor_needed + touch zero || framework_failure + $i zero || compressor_needed +done + +cat "${testdir}"/test.txt > in || framework_failure +cat "${testdir}"/test.txt.tar > in.tar || framework_failure +printf "01234567890" > pin.tar4 || framework_failure +cat in.tar in.tar in.tar in.tar >> pin.tar4 || framework_failure +cat in > -in- || framework_failure +cat in.lz > -in-.lz || framework_failure +cat in.lz > lz_only.lz || framework_failure +cat in in in in in in > in6 || framework_failure +bad0_lz="${testdir}"/zero_bad_crc.lz +bad0_gz="${testdir}"/zero_bad_crc.gz +bad1_lz="${testdir}"/test_bad_crc.lz +touch empty empty.bz2 empty.gz empty.lz || framework_failure +fail=0 +test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } + +printf "testing zcat-%s..." "$2" + +for i in ${extensions}; do + "${ZCAT}" -N in.$i > out || test_failed $LINENO $i + cmp in out || test_failed $LINENO $i + "${ZCAT}" -N empty in.$i > out || test_failed $LINENO $i + cmp in out || test_failed $LINENO $i + "${ZCAT}" -N empty.$i in.$i > out || test_failed $LINENO $i + cmp in out || test_failed $LINENO $i + "${ZCAT}" -N in.$i empty > out || test_failed $LINENO $i + cmp in out || test_failed $LINENO $i + "${ZCAT}" -N in.$i empty.$i > out || test_failed $LINENO $i + cmp in out || test_failed $LINENO $i + "${ZCAT}" -N zero.$i in.$i > out || test_failed $LINENO $i + cmp in out || test_failed $LINENO $i + "${ZCAT}" -N in.$i zero.$i > out || test_failed $LINENO $i + cmp in out || test_failed $LINENO $i + "${ZCAT}" -N --format=un in.$i > out || test_failed $LINENO $i + cmp in out || test_failed $LINENO $i + "${ZCAT}" -N --force-format=$i in.$i > out || test_failed $LINENO $i + cmp in out || test_failed $LINENO $i + "${ZCAT}" -N in.$i | dd bs=1000 count=1 > out 2> /dev/null || + test_failed $LINENO $i + dd if=in bs=1000 count=1 2> /dev/null | cmp - out || + test_failed $LINENO $i +done + +printf "LZIP\001-.............................." | "${ZCAT}" -N > /dev/null 2>&1 +[ $? = 1 ] || test_failed $LINENO +printf "LZIPxxxxxx" | "${ZCAT}" -N > /dev/null || test_failed $LINENO +printf "BZh9xxxxxx" | "${ZCAT}" -N > /dev/null || test_failed $LINENO +"${ZCAT}" -N -v -s "${testdir}"/zcat_vs.dat > /dev/null || test_failed $LINENO +"${ZCAT}" -N < in > out || test_failed $LINENO +cmp in out || test_failed $LINENO +"${ZCAT}" -N < in.gz > out || test_failed $LINENO +cmp in out || test_failed $LINENO +"${ZCAT}" -N < in.bz2 > out || test_failed $LINENO +cmp in out || test_failed $LINENO +"${ZCAT}" -N < in.lz > out || test_failed $LINENO +cmp in out || test_failed $LINENO +"${ZCAT}" -N -O lz - - < in.lz > out || test_failed $LINENO +cmp in out || test_failed $LINENO +"${ZCAT}" -N -O un in.lz | lzip -d > out || test_failed $LINENO +cmp in out || test_failed $LINENO +"${ZCAT}" -N --lz='lzip -q' < in.lz > out || test_failed $LINENO +cmp in out || test_failed $LINENO +"${ZCAT}" -N in > out || test_failed $LINENO +cmp in out || test_failed $LINENO +"${ZCAT}" -N lz_only > out || test_failed $LINENO +cmp in out || test_failed $LINENO +cat in.lz in in in in | "${ZCAT}" -N > out || test_failed $LINENO # tdata +cmp in out || test_failed $LINENO +"${ZCAT}" -N in in.gz in.bz2 in.lz -- -in- -in-.lz > out || test_failed $LINENO +cmp in6 out || test_failed $LINENO +"${ZCAT}" -Nq in in.gz in.bz2 in.lz "${bad0_lz}" -- -in- -in-.lz > out +[ $? = 1 ] || test_failed $LINENO +cmp in6 out || test_failed $LINENO +"${ZCAT}" -Nq "${bad1_lz}" -- -in-.lz in in.gz in.bz2 in.lz > out +[ $? = 1 ] || test_failed $LINENO +cmp in6 out || test_failed $LINENO +"${ZCAT}" -N . || test_failed $LINENO +"${ZCAT}" -N -r . > /dev/null || test_failed $LINENO +"${ZCAT}" -N -r > /dev/null || test_failed $LINENO +"${ZCAT}" -N -R . > /dev/null || test_failed $LINENO +"${ZCAT}" -N -R > /dev/null || test_failed $LINENO + +"${ZCAT}" -Nq "" < in.lz > /dev/null +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --format=, in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --format=,lz in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --format=gz,,lz in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --format=lz,, in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --format=nc in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --lz='-lzip -q' in.lz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -N --force-format=gz in.bz2 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -N --force-format=bz2 in.lz 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -Nq --force-format=lz in.gz +[ $? = 1 ] || test_failed $LINENO +"${ZCAT}" -N --bad-option 2> /dev/null +[ $? = 1 ] || test_failed $LINENO + + +printf "\ntesting zcmp-%s..." "$2" + +for i in ${extensions}; do + "${ZCMP}" -N in.$i || test_failed $LINENO $i + "${ZCMP}" -N in in.$i || test_failed $LINENO $i + "${ZCMP}" -N in in.$i --force-format=,$i || test_failed $LINENO $i + "${ZCMP}" -N in.$i in || test_failed $LINENO $i + "${ZCMP}" -N in.$i in --force-format=$i || test_failed $LINENO $i + "${ZCMP}" -N -i 1kB:1000 -n 500 in6 in.$i || test_failed $LINENO $i + "${ZCMP}" -N -i 1KiB:1024 -n 50 in.$i in6 || test_failed $LINENO $i + "${ZCMP}" -N empty empty.$i || test_failed $LINENO $i + "${ZCMP}" -N empty zero.$i || test_failed $LINENO $i +done + +"${ZCMP}" -N -q in in6 +[ $? = 1 ] || test_failed $LINENO +"${ZCMP}" -N -n 0 in in6 || test_failed $LINENO +"${ZCMP}" -N -n 100B in in6 || test_failed $LINENO +"${ZCMP}" -N -n 1k in in6 || test_failed $LINENO +"${ZCMP}" -N -n 10kB in in6 || test_failed $LINENO +"${ZCMP}" -N -n 01750 in in6 || test_failed $LINENO +"${ZCMP}" -N -n 0x3E8 in in6 || test_failed $LINENO +"${ZCMP}" -N -s in.tar pin.tar4 +[ $? = 1 ] || test_failed $LINENO +"${ZCMP}" -N -q -i 0B:11B in.tar pin.tar4 +[ $? = 1 ] || test_failed $LINENO +"${ZCMP}" -N -i 0:11 -n 0 in.tar pin.tar4 || test_failed $LINENO +"${ZCMP}" -N -i 0:11 -n 100 in.tar pin.tar4 || test_failed $LINENO +"${ZCMP}" -N -i 0:013 -n 1Ki in.tar pin.tar4 || test_failed $LINENO +"${ZCMP}" -N -i 0:0xB -n 10KiB in.tar pin.tar4 || test_failed $LINENO +"${ZCMP}" -N - - || test_failed $LINENO +"${ZCMP}" -N -q - +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -N in in || test_failed $LINENO +"${ZCMP}" -N in || test_failed $LINENO +"${ZCMP}" -N --format=gz,bz2 in || test_failed $LINENO +"${ZCMP}" -N --format=gz in || test_failed $LINENO +"${ZCMP}" -N in.lz in.gz || test_failed $LINENO +cat in.lz | "${ZCMP}" -N -O un,un in.lz - || test_failed $LINENO +"${ZCMP}" -N --lz='lzip -q' in.lz in.gz || test_failed $LINENO +"${ZCMP}" -N in.gz -- -in-.lz || test_failed $LINENO +"${ZCMP}" -N -- -in-.lz in.gz || test_failed $LINENO +"${ZCMP}" -N in -- -in-.lz || test_failed $LINENO +"${ZCMP}" -N -- -in- in.lz || test_failed $LINENO +"${ZCMP}" -N in.lz -- -in- || test_failed $LINENO +"${ZCMP}" -N -- -in-.lz in || test_failed $LINENO +"${ZCMP}" -N -- -in- in || test_failed $LINENO +"${ZCMP}" -N in -- -in- || test_failed $LINENO +"${ZCMP}" -N in.lz - < in || test_failed $LINENO +"${ZCMP}" -N - in.lz < in || test_failed $LINENO +"${ZCMP}" -N in - < in.lz || test_failed $LINENO +"${ZCMP}" -N - in < in.lz || test_failed $LINENO +"${ZCMP}" -N lz_only.lz - < in || test_failed $LINENO +"${ZCMP}" -N -q lz_only.lz +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -N -q "" in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -N -q --force-format=lz in.lz +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -N -q --force-format=lz in.gz in.lz +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -N -q -i 100BB in in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -N -q -i 100BB:100 in in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -N -q -i 100: in in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -N -q -n -1 in in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -N -q -n 100BB in in +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -N -q --gz=bad-gzip in.gz in.lz +[ $? = 2 ] || test_failed $LINENO +"${ZCMP}" -N --bad-option in in 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +cat in.gz > a.gz || framework_failure +cat in.lz > a.lz || framework_failure +"${ZCMP}" -N a.gz || test_failed $LINENO +"${ZCMP}" -N a.lz || test_failed $LINENO + + +printf "\ntesting zdiff-%s..." "$2" + +"${ZDIFF}" -N a.gz || test_failed $LINENO +"${ZDIFF}" -N a.lz || test_failed $LINENO +rm -f a.gz a.lz || framework_failure + +for i in ${extensions}; do + "${ZDIFF}" -N in.$i > /dev/null || test_failed $LINENO $i + "${ZDIFF}" -N in in.$i > /dev/null || test_failed $LINENO $i + "${ZDIFF}" -N in in.$i --force-format=,$i > /dev/null || + test_failed $LINENO $i + "${ZDIFF}" -N in.$i in > /dev/null || test_failed $LINENO $i + "${ZDIFF}" -N in.$i in --force-format=$i, > /dev/null || + test_failed $LINENO $i + "${ZDIFF}" -N empty empty.$i > /dev/null || test_failed $LINENO $i + "${ZDIFF}" -N empty zero.$i > /dev/null || test_failed $LINENO $i +done + +"${ZDIFF}" -N in in6 > /dev/null +[ $? = 1 ] || test_failed $LINENO +# GNU diff 3.0 returns 2 (instead of 1) when binary files differ +"${ZDIFF}" -N in.tar pin.tar4 > /dev/null && test_failed $LINENO +"${ZDIFF}" -N - - || test_failed $LINENO +"${ZDIFF}" -N - 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N in in || test_failed $LINENO +"${ZDIFF}" -N in || test_failed $LINENO +"${ZDIFF}" -N --format=gz,bz2 in || test_failed $LINENO +"${ZDIFF}" -N --format=gz in || test_failed $LINENO +"${ZDIFF}" -N in.lz in.gz > /dev/null || test_failed $LINENO +cat in.gz | "${ZDIFF}" -N -O un,un - in.gz || test_failed $LINENO +"${ZDIFF}" -N --lz='lzip -q' in.lz in.gz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in.gz -- -in-.lz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N -- -in-.lz in.gz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in -- -in-.lz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N -- -in- in.lz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in.lz -- -in- > /dev/null || test_failed $LINENO +"${ZDIFF}" -N -- -in-.lz in > /dev/null || test_failed $LINENO +"${ZDIFF}" -N -- -in- in > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in -- -in- > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in.lz - < in > /dev/null || test_failed $LINENO +"${ZDIFF}" -N - in.lz < in > /dev/null || test_failed $LINENO +"${ZDIFF}" -N in - < in.lz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N - in < in.lz > /dev/null || test_failed $LINENO +"${ZDIFF}" -N lz_only.lz - < in > /dev/null || test_failed $LINENO +"${ZDIFF}" -N lz_only.lz 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N "" in 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --bz2='-bzip2' in.bz2 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --brief --force-format=bz2 in.bz2 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --brief --force-format=,lz in.lz in.bz2 > /dev/null 2>&1 +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --brief --gz=bad-gzip in.gz in.lz > /dev/null 2>&1 +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --bad-option 2> /dev/null +[ $? = 2 ] || test_failed $LINENO + +mkdir tmp2 +cat in > tmp2/a || framework_failure +cat in.lz > tmp2/a.lz || framework_failure +"${ZDIFF}" -N --format=bz2 tmp2/a 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --format=gz tmp2/a 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --format=lz tmp2/a.lz 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZDIFF}" -N --format=lz tmp2/a || test_failed $LINENO +"${ZDIFF}" -N --format=un tmp2/a.lz || test_failed $LINENO +rm -r tmp2 || framework_failure + + +printf "\ntesting zgrep-%s..." "$2" + +for i in ${extensions}; do + "${ZGREP}" -N "GNU" in.$i > /dev/null || test_failed $LINENO $i + "${ZGREP}" -N "GNU" in.$i hello.$i > /dev/null || test_failed $LINENO $i + "${ZGREP}" -N "GNU" hello.$i in.$i > /dev/null || test_failed $LINENO $i + "${ZGREP}" -N -q "GNU" in.$i hello.$i || test_failed $LINENO $i + "${ZGREP}" -N -q "GNU" hello.$i in.$i || test_failed $LINENO $i + "${ZGREP}" -N "GNU" < in.$i > /dev/null || test_failed $LINENO $i + "${ZGREP}" -N -l "GNU" in.$i > /dev/null || test_failed $LINENO $i + "${ZGREP}" -N -L "GNU" in.$i || test_failed $LINENO $i + "${ZGREP}" -N --force-format=$i "GNU" in.$i > /dev/null || + test_failed $LINENO $i + "${ZGREP}" -N -v "nx_pattern" in.$i > /dev/null || + test_failed $LINENO $i + "${ZGREP}" -N "nx_pattern" in.$i && test_failed $LINENO $i + "${ZGREP}" -N -l "nx_pattern" in.$i && test_failed $LINENO $i + "${ZGREP}" -N -L "nx_pattern" in.$i > /dev/null && + test_failed $LINENO $i + "${ZGREP}" -N --force-format=$i "GNU" in 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + "${ZGREP}" -N "nx_pattern" empty.$i && test_failed $LINENO $i + "${ZGREP}" -N "nx_pattern" zero.$i && test_failed $LINENO $i +done + +"${ZGREP}" -N "nx_pattern" empty && test_failed $LINENO +"${ZGREP}" -N pin.tar4 -e "GNU" > /dev/null || test_failed $LINENO +"${ZGREP}" -N "GNU" < pin.tar4 > /dev/null || test_failed $LINENO +"${ZGREP}" -N -r "GNU" . > /dev/null || test_failed $LINENO +"${ZGREP}" -N -r "GNU" > /dev/null || test_failed $LINENO +"${ZGREP}" -N -R "GNU" . > /dev/null || test_failed $LINENO +"${ZGREP}" -N -R "GNU" > /dev/null || test_failed $LINENO +"${ZGREP}" -N "nx_pattern" -r . in > /dev/null && test_failed $LINENO +"${ZGREP}" -N -e "GNU" in > /dev/null || test_failed $LINENO +"${ZGREP}" -N "GNU" < in > /dev/null || test_failed $LINENO +"${ZGREP}" -N -O lz "nx_pattern" - - < in.lz > /dev/null && test_failed $LINENO +"${ZGREP}" -N -O un "LZIP" in.lz > /dev/null || test_failed $LINENO +"${ZGREP}" -N -e "-free" --lz='lzip -q' < in.lz > /dev/null || + test_failed $LINENO +"${ZGREP}" -N -- "-free" -in- > /dev/null || test_failed $LINENO +"${ZGREP}" -N -q -- "-free" nx_file -in-.lz || test_failed $LINENO +"${ZGREP}" -N "GNU" in in.gz in.bz2 in.lz -- -in- > /dev/null || + test_failed $LINENO +"${ZGREP}" -N -l "GNU" in in.gz in.bz2 in.lz -- -in- > /dev/null || + test_failed $LINENO +"${ZGREP}" -N -l -Z "GNU" in in.gz in.bz2 in.lz -- -in- > /dev/null || + test_failed $LINENO +"${ZGREP}" -N -L "GNU" in in.gz in.bz2 in.lz -- -in- || test_failed $LINENO +"${ZGREP}" -N -l "nx_pattern" in in.gz in.bz2 in.lz -- -in- && + test_failed $LINENO +"${ZGREP}" -N -L "nx_pattern" in in.gz in.bz2 in.lz -- -in- > /dev/null && + test_failed $LINENO +"${ZGREP}" -Nq -l "01234567890" in "${bad1_lz}" in.lz && test_failed $LINENO +"${ZGREP}" -Nq -l "01234567890" in "${bad1_lz}" in.lz pin.tar4 > /dev/null || + test_failed $LINENO + +"${ZGREP}" -N "GNU" . +[ $? = 1 ] || test_failed $LINENO +"${ZGREP}" -N "GNU" "" < in.lz 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZGREP}" -N --bad-option 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZGREP}" -N "GNU" -s nx_file +[ $? = 2 ] || test_failed $LINENO +"${ZGREP}" -N -q +[ $? = 2 ] || test_failed $LINENO + +"${ZEGREP}" -N "GNU" in > /dev/null || test_failed $LINENO +"${ZFGREP}" -N "GNU" in > /dev/null || test_failed $LINENO + + +printf "\ntesting ztest-%s..." "$2" + +for i in ${extensions}; do + "${ZTEST}" -N --force-format=$i < in.$i || test_failed $LINENO $i + "${ZTEST}" -N --force-format=$i < in 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + "${ZTEST}" -N --force-format=$i in 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + "${ZTEST}" -N empty.$i 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i +done + +rm -f empty.bz2 empty.gz empty.lz || framework_failure +"${ZTEST}" -N in in.gz in.bz2 in.lz -- -in- || test_failed $LINENO +"${ZTEST}" -N < in.gz || test_failed $LINENO +"${ZTEST}" -N < in.bz2 || test_failed $LINENO +"${ZTEST}" -N < in.lz || test_failed $LINENO +"${ZTEST}" -N - < in.lz || test_failed $LINENO +"${ZTEST}" -N - in.gz - < in.lz || test_failed $LINENO +"${ZTEST}" -N --lz='lzip -q' < in.lz || test_failed $LINENO +"${ZTEST}" -N -r . || test_failed $LINENO +"${ZTEST}" -N -r || test_failed $LINENO +"${ZTEST}" -N -R . || test_failed $LINENO +"${ZTEST}" -N -R || test_failed $LINENO +"${ZTEST}" -N empty || test_failed $LINENO + +# test wrong compressed extensions +cat in.bz2 > in_bz2.gz || framework_failure +cat in.gz > in_gz.lz || framework_failure +cat in.lz > in_lz.bz2 || framework_failure +cat in > in_un.lz || framework_failure +"${ZTEST}" -Nq in_bz2.gz +[ $? = 2 ] || test_failed $LINENO +"${ZTEST}" -Nq in_gz.lz +[ $? = 2 ] || test_failed $LINENO +"${ZTEST}" -Nq in_lz.bz2 +[ $? = 2 ] || test_failed $LINENO +"${ZTEST}" -Nq in_un.lz +[ $? = 2 ] || test_failed $LINENO +rm -f in_bz2.gz in_gz.lz in_lz.bz2 in_un.lz || framework_failure + +"${ZTEST}" -Nq in.gz "${bad0_lz}" in.bz2 "${bad1_lz}" in.lz +[ $? = 2 ] || test_failed $LINENO +lines=`"${ZTEST}" -N in.gz "${bad0_lz}" in.bz2 "${bad1_lz}" in.lz 2>&1 | wc -l` +[ "${lines}" -eq 2 ] || test_failed $LINENO "${lines}" +lines=`"${ZTEST}" -Nv in.gz "${bad0_lz}" in.bz2 "${bad1_lz}" in.lz 2>&1 | wc -l` +[ "${lines}" -eq 6 ] || test_failed $LINENO "${lines}" +"${ZTEST}" -Nq < in +[ $? = 2 ] || test_failed $LINENO +"${ZTEST}" -Nq --force-format=un < in.gz +[ $? = 1 ] || test_failed $LINENO +"${ZTEST}" -Nq "" < in.lz +[ $? = 1 ] || test_failed $LINENO +dd if=in.lz bs=1000 count=1 2> /dev/null | "${ZTEST}" -N -q +[ $? = 2 ] || test_failed $LINENO +"${ZTEST}" -Nq --force-format=lz in.bz2 +[ $? = 2 ] || test_failed $LINENO +"${ZTEST}" -Nq --force-format=un in.gz +[ $? = 1 ] || test_failed $LINENO +"${ZTEST}" -N --lz='lzip --bad-option' in.lz 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${ZTEST}" -N --bad-option 2> /dev/null +[ $? = 1 ] || test_failed $LINENO + + +printf "\ntesting zupdate-%s..." "$2" + +"${ZUPDATE}" -N "" || test_failed $LINENO +cat in.bz2 > a.bz2 || framework_failure +cat in.gz > a.gz || framework_failure +"${ZUPDATE}" -Nq --bz2=bad_command a.bz2 +[ $? = 1 ] || test_failed $LINENO +"${ZUPDATE}" -N --bz2='bzip2 --bad-option' a.bz2 > /dev/null 2>&1 +[ $? = 1 ] || test_failed $LINENO +"${ZUPDATE}" -Nq --gz=bad_command a.gz +[ $? = 1 ] || test_failed $LINENO +"${ZUPDATE}" -N --gz='gzip --bad-option' a.gz 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${ZUPDATE}" -Nq --lz=bad_command a.gz +[ $? = 2 ] || test_failed $LINENO +"${ZUPDATE}" -N --lz='lzip --bad-option' a.gz 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +"${ZUPDATE}" -N --bad-option 2> /dev/null +[ $? = 2 ] || test_failed $LINENO + +if /bin/sh -c "tarlz -V" > /dev/null 2>&1; then + printf . + gzip < in.tar > in.tar.gz || framework_failure + "${ZUPDATE}" -N -k --lz='tarlz -0 -z --no-solid' in.tar.gz || + test_failed $LINENO + [ -e in.tar ] || test_failed $LINENO + "${ZCMP}" -N in.tar.gz in.tar.lz || test_failed $LINENO + rm -f in.tar.gz in.tar.lz || framework_failure +fi + +cat in.lz in.lz > a.lz || framework_failure +"${ZUPDATE}" -N -q -f a.bz2 a.gz +[ $? = 2 ] || test_failed $LINENO +[ -e a.bz2 ] || test_failed $LINENO +[ -e a.gz ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +"${ZUPDATE}" -N -0 a.bz2 || test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ -e a.gz ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure +"${ZUPDATE}" -N -0 a.gz || test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ ! -e a.gz ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +cat in.bz2 > a.bz2 || framework_failure +cat in.gz > a.gz || framework_failure +"${ZUPDATE}" -N -q -0 a.bz2 a.gz +[ $? = 1 ] || test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ -e a.gz ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +cat in.bz2 > a.bz2 || framework_failure +cat in.gz > a.gz || framework_failure +"${ZUPDATE}" -N -0 -f -k a.bz2 a.gz || test_failed $LINENO +[ -e a.bz2 ] || test_failed $LINENO +[ -e a.gz ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +cat in.bz2 > a.bz2 || framework_failure +cat in.gz > a.gz || framework_failure +"${ZUPDATE}" -N -0 -f a.bz2 a.gz || test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ ! -e a.gz ] || test_failed $LINENO +[ ! -e a ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +cat in.bz2 > a.tbz || framework_failure # keep combined extensions +cat in.bz2 > b.tbz2 || framework_failure +cat in.gz > c.tgz || framework_failure +"${ZUPDATE}" -N -0 a.tbz b.tbz2 c.tgz || test_failed $LINENO +[ ! -e a.tbz ] || test_failed $LINENO +[ ! -e b.tbz2 ] || test_failed $LINENO +[ ! -e c.tgz ] || test_failed $LINENO +[ ! -e a ] || test_failed $LINENO +[ ! -e b ] || test_failed $LINENO +[ ! -e c ] || test_failed $LINENO +[ ! -e a.lz ] || test_failed $LINENO +[ ! -e b.lz ] || test_failed $LINENO +[ ! -e c.lz ] || test_failed $LINENO +[ -e a.tlz ] || test_failed $LINENO +[ -e b.tlz ] || test_failed $LINENO +[ -e c.tlz ] || test_failed $LINENO +rm -f a.tlz b.tlz c.tlz || framework_failure + +cat in.bz2 > a.tbz || framework_failure # expand combined extensions +cat in.bz2 > b.tbz2 || framework_failure +cat in.gz > c.tgz || framework_failure +"${ZUPDATE}" -N -0 -e a.tbz b.tbz2 c.tgz || test_failed $LINENO +[ ! -e a.tbz ] || test_failed $LINENO +[ ! -e b.tbz2 ] || test_failed $LINENO +[ ! -e c.tgz ] || test_failed $LINENO +[ ! -e a ] || test_failed $LINENO +[ ! -e b ] || test_failed $LINENO +[ ! -e c ] || test_failed $LINENO +[ -e a.tar.lz ] || test_failed $LINENO +[ -e b.tar.lz ] || test_failed $LINENO +[ -e c.tar.lz ] || test_failed $LINENO +[ ! -e a.tlz ] || test_failed $LINENO +[ ! -e b.tlz ] || test_failed $LINENO +[ ! -e c.tlz ] || test_failed $LINENO +rm -f a.tar.lz b.tar.lz c.tar.lz || framework_failure + +# test decompression error +cat in.bz2 > a.bz2 || framework_failure +cat "${bad0_gz}" > b.gz || framework_failure +cat in.gz > c.gz || framework_failure +"${ZUPDATE}" -N -0 -f a.bz2 b.gz c.gz 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ -e b.gz ] || test_failed $LINENO +[ -e c.gz ] || test_failed $LINENO +[ ! -e a ] || test_failed $LINENO +[ ! -e b ] || test_failed $LINENO +[ ! -e c ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +# ignore error +cat in.bz2 > a.bz2 || framework_failure +cat "${bad0_gz}" > b.gz || framework_failure +cat in.gz > c.gz || framework_failure +"${ZUPDATE}" -N -0 -f -i a.bz2 b.gz c.gz 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ -e b.gz ] || test_failed $LINENO +[ ! -e c.gz ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz b.gz c.gz || framework_failure + +cat in.bz2 > a.bz2 || framework_failure +"${ZUPDATE}" -N -0 -q a.bz2 || test_failed $LINENO +[ ! -e a.bz2 ] || test_failed $LINENO +[ -e a.lz ] || test_failed $LINENO +rm -f a.lz || framework_failure + +cat in.gz > 'name with spaces.gz' || framework_failure +"${ZUPDATE}" -N -0 -q 'name with spaces.gz' || test_failed $LINENO +[ ! -e 'name with spaces.gz' ] || test_failed $LINENO +[ -e 'name with spaces.lz' ] || test_failed $LINENO +"${ZCMP}" -N in 'name with spaces.lz' || test_failed $LINENO +rm -f 'name with spaces.lz' || framework_failure + +cat zero.gz > z.gz || framework_failure +"${ZUPDATE}" -N -0 -q z.gz || test_failed $LINENO +[ ! -e z.gz ] || test_failed $LINENO +[ -e z.lz ] || test_failed $LINENO +"${ZCMP}" -N empty z.lz || test_failed $LINENO +rm -f empty z.lz || framework_failure + +mkdir tmp2 +mkdir tmp2/tmp3 +cat in.bz2 > tmp2/tmp3/a.bz2 || framework_failure +cat in.gz > tmp2/tmp3/a.gz || framework_failure +# test recursive to destdir +"${ZUPDATE}" -N -0 -k -r --format=gz --destdir=ddir1 tmp2 || test_failed $LINENO +[ -e tmp2/tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp2/tmp3/a.gz ] || test_failed $LINENO +[ -e ddir1/tmp3/a.lz ] || test_failed $LINENO +"${ZUPDATE}" -N -0 -k -r --format=bz2 --destdir="${objdir}"/tmp/ddir2 tmp2 || + test_failed $LINENO +[ -e tmp2/tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp2/tmp3/a.gz ] || test_failed $LINENO +[ -e ddir2/tmp3/a.lz ] || test_failed $LINENO +# test non-recursive to destdir +"${ZUPDATE}" -N -0 -k --destdir=ddir3/// tmp2/tmp3/a.gz || test_failed $LINENO +[ -e tmp2/tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp2/tmp3/a.gz ] || test_failed $LINENO +[ -e ddir3/a.lz ] || test_failed $LINENO +"${ZUPDATE}" -N -0 -k --destdir=ddir4/tmp2/tmp3 tmp2/tmp3/a.gz || + test_failed $LINENO +[ -e tmp2/tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp2/tmp3/a.gz ] || test_failed $LINENO +[ -e ddir4/tmp2/tmp3/a.lz ] || test_failed $LINENO +rm -rf ddir1 ddir2 ddir3 ddir4 || framework_failure +# test recursive in place +"${ZUPDATE}" -N -0 -r --format=gz tmp2 || test_failed $LINENO +[ -e tmp2/tmp3/a.bz2 ] || test_failed $LINENO +[ ! -e tmp2/tmp3/a.gz ] || test_failed $LINENO +[ -e tmp2/tmp3/a.lz ] || test_failed $LINENO +rm -f tmp2/tmp3/a.lz || framework_failure +"${ZUPDATE}" -N -0 -r --format=bz2 tmp2 || test_failed $LINENO +[ ! -e tmp2/tmp3/a.bz2 ] || test_failed $LINENO +[ ! -e tmp2/tmp3/a.gz ] || test_failed $LINENO +[ -e tmp2/tmp3/a.lz ] || test_failed $LINENO +rm -f tmp2/tmp3/a.lz || framework_failure +cat in.bz2 > tmp2/tmp3/a.bz2 || framework_failure +cat in.gz > tmp2/tmp3/a.gz || framework_failure +cd tmp2 || framework_failure +"${ZUPDATE}" -N -0 -r -k -f . || test_failed $LINENO +[ -e tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp3/a.gz ] || test_failed $LINENO +[ -e tmp3/a.lz ] || test_failed $LINENO +rm -f tmp3/a.lz || framework_failure +"${ZUPDATE}" -N -0 -r -k -f || test_failed $LINENO +[ -e tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp3/a.gz ] || test_failed $LINENO +[ -e tmp3/a.lz ] || test_failed $LINENO +rm -f tmp3/a.lz || framework_failure +"${ZUPDATE}" -N -0 -R -k -f . || test_failed $LINENO +[ -e tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp3/a.gz ] || test_failed $LINENO +[ -e tmp3/a.lz ] || test_failed $LINENO +rm -f tmp3/a.lz || framework_failure +"${ZUPDATE}" -N -0 -R -k -f || test_failed $LINENO +[ -e tmp3/a.bz2 ] || test_failed $LINENO +[ -e tmp3/a.gz ] || test_failed $LINENO +[ -e tmp3/a.lz ] || test_failed $LINENO +rm -f tmp3/a.lz || framework_failure +"${ZUPDATE}" -N -0 -r -f . || test_failed $LINENO +[ ! -e tmp3/a.bz2 ] || test_failed $LINENO +[ ! -e tmp3/a.gz ] || test_failed $LINENO +[ -e tmp3/a.lz ] || test_failed $LINENO +cd .. || framework_failure +rm -r tmp2 || framework_failure + +if ln -s '.' slink 2> /dev/null ; then + "${ZCAT}" -N -r slink > /dev/null || test_failed $LINENO + "${ZGREP}" -N -r "GNU" slink > /dev/null || test_failed $LINENO + "${ZTEST}" -N -r slink || test_failed $LINENO + "${ZUPDATE}" -N -r -f slink || test_failed $LINENO +else + printf "\nwarning: skipping link test: 'ln' does not work on your system." +fi +rm -f slink || framework_failure + +echo +if [ ${fail} = 0 ] ; then + echo "tests completed successfully." + cd "${objdir}" && rm -r tmp +else + echo "tests failed." +fi +exit ${fail} diff --git a/testsuite/test.txt b/testsuite/test.txt new file mode 100644 index 0000000..9196a3a --- /dev/null +++ b/testsuite/test.txt @@ -0,0 +1,676 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) <year> <name of author> + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. + GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) <year> <name of author>
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/testsuite/test.txt.tar b/testsuite/test.txt.tar Binary files differnew file mode 100644 index 0000000..92d6f1b --- /dev/null +++ b/testsuite/test.txt.tar diff --git a/testsuite/test_bad_crc.lz b/testsuite/test_bad_crc.lz Binary files differnew file mode 100644 index 0000000..c7d5bc9 --- /dev/null +++ b/testsuite/test_bad_crc.lz diff --git a/testsuite/zcat_vs.dat b/testsuite/zcat_vs.dat new file mode 100644 index 0000000..42333e8 --- /dev/null +++ b/testsuite/zcat_vs.dat @@ -0,0 +1,68 @@ +Worst case test file for zcat -vs. +First 4096 input bytes produce 4095 output bytes because of -s. +Next 4096 input bytes produce 16384 output bytes, accumulating a total +of 20479 bytes (5 * 4096 - 1) in the output buffer€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€€
\ No newline at end of file diff --git a/testsuite/zero_bad_crc.gz b/testsuite/zero_bad_crc.gz Binary files differnew file mode 100644 index 0000000..a2a9991 --- /dev/null +++ b/testsuite/zero_bad_crc.gz diff --git a/testsuite/zero_bad_crc.lz b/testsuite/zero_bad_crc.lz Binary files differnew file mode 100644 index 0000000..0d3cc93 --- /dev/null +++ b/testsuite/zero_bad_crc.lz @@ -0,0 +1,392 @@ +/* Zcat - decompress and concatenate files to standard output + Copyright (C) 2010-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <list> +#include <string> +#include <vector> +#include <dirent.h> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> +#if defined __MSVCRT__ || defined __OS2__ +#include <io.h> +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + + +namespace { + +#include "recursive.cc" +#include "zcatgrep.cc" + +struct Cat_options + { + int number_lines; // 0 = no, 1 = nonblank, 2 = all + bool show_ends; + bool show_nonprinting; + bool show_tabs; + bool squeeze_blank; + + Cat_options() + : number_lines( 0 ), show_ends( false ), show_nonprinting( false ), + show_tabs( false ), squeeze_blank( false ) {} + }; + + +class Line_number // unlimited size line counter + { + std::string str; + unsigned first_digit_pos; + +public: + Line_number() : str( " 0\t" ), first_digit_pos( 5 ) {} + + void next() + { + for( unsigned i = str.size() - 1; i > first_digit_pos; ) + { + if( str[--i] < '9' ) { ++str[i]; return; } + str[i] = '0'; + } + if( first_digit_pos > 0 ) str[--first_digit_pos] = '1'; + else str.insert( str.begin() + first_digit_pos, '1' ); + } + + int sprint( uint8_t * const buf ) + { + std::memcpy( buf, str.c_str(), str.size() ); + return str.size(); + } + }; + +Line_number line_number; + + +void show_help() + { + std::printf( "zcat copies each file argument to standard output in sequence. If any\n" + "file given is compressed, its decompressed content is copied. If a file\n" + "given does not exist, and its name does not end with one of the known\n" + "extensions, zcat tries the compressed file names corresponding to the\n" + "formats supported until one is found. If a file fails to decompress, zcat\n" + "continues copying the rest of the files.\n" + "\nIf a file is specified as '-', data are read from standard input,\n" + "decompressed if needed, and sent to standard output. Data read from\n" + "standard input must be of the same type; all uncompressed or all in the\n" + "same compressed format.\n" + "\nIf no files are specified, recursive searches examine the current\n" + "working directory, and nonrecursive searches read standard input.\n" + "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n" + "\nUsage: zcat [options] [files]\n" + "\nExit status is 0 if no errors occurred, 1 otherwise.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -A, --show-all equivalent to '-vET'\n" + " -b, --number-nonblank number nonblank output lines\n" + " -e equivalent to '-vE'\n" + " -E, --show-ends display '$' at end of each line\n" + " -M, --format=<list> process only the formats in <list>\n" + " -n, --number number all output lines\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -O, --force-format=<fmt> force the input format\n" + " -q, --quiet suppress all messages\n" + " -r, --recursive operate recursively on directories\n" + " -R, --dereference-recursive recursively follow symbolic links\n" + " -s, --squeeze-blank never more than one single blank line\n" + " -t equivalent to '-vT'\n" + " -T, --show-tabs display TAB characters as '^I'\n" + " -v, --show-nonprinting use '^' and 'M-' notation, except for LF and TAB\n" + " --verbose verbose mode (show error messages)\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" + " --zst=<command> set compressor and options for zstd format\n" + "\nValid formats for options '-M' and '-O' are 'bz2', 'gz', 'lz', 'xz', 'zst',\n" + "and 'un' for uncompressed.\n" ); + show_help_addr(); + } + + +bool do_cat( const int infd, const int buffer_size, + uint8_t * const inbuf, uint8_t * const outbuf, + const std::string & input_filename, + const Cat_options & cat_options ) + { + static int at_bol = 1; // at begin of line. 0 = false, 1 = true, + // 2 = at begin of second blank line. + int inpos = 0; // positions in buffers + int outpos = 0; + int rd = -1; // bytes read by the last readblock + unsigned char c; + + while( true ) + { + do { + if( outpos >= buffer_size ) + { + if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos ) + { show_error( "Write error", errno ); return false; } + outpos = 0; + } + if( inpos > rd ) // inbuf is empty + { + rd = readblock( infd, inbuf, buffer_size ); + if( rd != buffer_size && errno ) + { + show_file_error( input_filename.c_str(), "Read error", errno ); + return false; + } + if( rd == 0 ) + { + if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos ) + { show_error( "Write error", errno ); return false; } + outpos = 0; + return true; + } + inpos = 0; + inbuf[rd] = '\n'; // sentinel newline + } + else // a real newline was found + { + if( at_bol > 1 ) + { + if( cat_options.squeeze_blank ) { c = inbuf[inpos++]; continue; } + } + else ++at_bol; + if( at_bol > 1 && cat_options.number_lines == 2 ) + { + line_number.next(); + outpos += line_number.sprint( &outbuf[outpos] ); + } + if( cat_options.show_ends ) outbuf[outpos++] = '$'; + outbuf[outpos++] = '\n'; // output the newline + } + c = inbuf[inpos++]; + } + while( c == '\n' ); + + if( at_bol > 0 && cat_options.number_lines ) + { + line_number.next(); + outpos += line_number.sprint( &outbuf[outpos] ); + } + at_bol = 0; + + // the loops below continue until a newline (real or sentinel) is found + + if( cat_options.show_nonprinting ) + while( true ) + { + if( c < 32 || c >= 127 ) + { + if( c == '\n' ) break; + if( c != '\t' || cat_options.show_tabs ) + { + if( c >= 128 ) + { c -= 128; outbuf[outpos++] = 'M'; outbuf[outpos++] = '-'; } + if( c < 32 ) { c += 64; outbuf[outpos++] = '^'; } + else if( c == 127 ) { c = '?'; outbuf[outpos++] = '^'; } + } + } + outbuf[outpos++] = c; + c = inbuf[inpos++]; + } + else // not quoting + while( c != '\n' ) + { + if( c == '\t' && cat_options.show_tabs ) + { c += 64; outbuf[outpos++] = '^'; } + outbuf[outpos++] = c; + c = inbuf[inpos++]; + } + } + } + + +bool cat( int infd, const int format_index, const std::string & input_filename, + const Cat_options & cat_options ) + { + enum { buffer_size = 4096, outbuf_size = (5 * buffer_size) + 256 + 1 }; + // input buffer with space for sentinel newline at the end + uint8_t * const inbuf = new uint8_t[buffer_size+1]; + /* output buffer with space for character quoting, 255-digit line number, + worst case flushing respect to inbuf, and a canary byte. */ + uint8_t * const outbuf = new uint8_t[outbuf_size]; + outbuf[outbuf_size-1] = 0; // canary byte; quoting does not print 0 + Children children; + bool error = false; + + if( !set_data_feeder( input_filename, &infd, children, format_index ) || + !do_cat( infd, buffer_size, inbuf, outbuf, input_filename, cat_options ) ) + error = true; + if( !good_status( children, !error ) ) error = true; + if( !error && close( infd ) != 0 ) { show_close_error(); error = true; } + if( outbuf[outbuf_size-1] != 0 ) internal_error( "buffer overflow." ); + delete[] outbuf; delete[] inbuf; + return !error; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { verbose_opt = 256, bz2_opt, gz_opt, lz_opt, xz_opt, zst_opt }; + int format_index = -1; // undefined + int recursive = 0; // 1 = '-r', 2 = '-R' + std::list< std::string > filenames; + Cat_options cat_options; + program_name = "zcat"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'A', "show-all", Arg_parser::no }, // cat + { 'b', "number-nonblank", Arg_parser::no }, // cat + { 'c', "stdout", Arg_parser::no }, // gzip + { 'd', "decompress", Arg_parser::no }, // gzip + { 'e', 0, Arg_parser::no }, // cat + { 'E', "show-ends", Arg_parser::no }, // cat + { 'f', "force", Arg_parser::no }, // gzip + { 'h', "help", Arg_parser::no }, + { 'l', "list", Arg_parser::no }, // gzip + { 'L', "license", Arg_parser::no }, // gzip + { 'M', "format", Arg_parser::yes }, + { 'n', "number", Arg_parser::no }, // cat + { 'N', "no-rcfile", Arg_parser::no }, + { 'O', "force-format", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'R', "dereference-recursive", Arg_parser::no }, + { 's', "squeeze-blank", Arg_parser::no }, // cat + { 't', 0, Arg_parser::no }, // cat + { 'T', "show-tabs", Arg_parser::no }, // cat + { 'v', "show-nonprinting", Arg_parser::no }, // cat + { 'V', "version", Arg_parser::no }, + { verbose_opt, "verbose", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { zst_opt, "zst", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 1; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case 'A': cat_options.show_ends = true; + cat_options.show_nonprinting = true; + cat_options.show_tabs = true; break; + case 'b': cat_options.number_lines = 1; break; + case 'c': break; + case 'd': break; + case 'e': cat_options.show_nonprinting = true; // fall through + case 'E': cat_options.show_ends = true; break; + case 'f': break; + case 'h': show_help(); return 0; + case 'l': break; + case 'L': break; + case 'M': parse_format_list( arg, pn ); break; + case 'n': if( cat_options.number_lines == 0 ) + { cat_options.number_lines = 2; } break; + case 'N': break; + case 'O': format_index = parse_format_type( arg, pn ); break; + case 'q': verbosity = -1; break; + case 'r': recursive = 1; break; + case 'R': recursive = 2; break; + case 's': cat_options.squeeze_blank = true; break; + case 't': cat_options.show_nonprinting = true; // fall through + case 'T': cat_options.show_tabs = true; break; + case 'v': cat_options.show_nonprinting = true; break; + case 'V': show_version(); return 0; + case verbose_opt: if( verbosity < 4 ) ++verbosity; break; + case bz2_opt: parse_compressor( arg, pn, fmt_bz2, 1 ); break; + case gz_opt: parse_compressor( arg, pn, fmt_gz, 1 ); break; + case lz_opt: parse_compressor( arg, pn, fmt_lz, 1 ); break; + case xz_opt: parse_compressor( arg, pn, fmt_xz, 1 ); break; + case zst_opt: parse_compressor( arg, pn, fmt_zst, 1 ); break; + default: internal_error( "uncaught option." ); + } + } // end process options + +#if defined __MSVCRT__ || defined __OS2__ + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + for( ; argind < parser.arguments(); ++argind ) + filenames.push_back( parser.argument( argind ) ); + + if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" ); + + std::string input_filename; + bool error = false; + bool stdin_used = false; + while( next_filename( filenames, input_filename, error, recursive ) ) + { + int infd; + if( input_filename == "." ) + { + if( stdin_used ) continue; else stdin_used = true; + infd = STDIN_FILENO; input_filename = "-"; + } + else + { + infd = open_instream( input_filename, format_index < 0 ); + if( infd < 0 ) { error = true; continue; } + } + + if( !cat( infd, format_index, input_filename, cat_options ) ) error = true; + + if( close( infd ) != 0 ) + { show_file_error( input_filename.c_str(), "Error closing input file", + errno ); error = true; } + } + + if( std::fclose( stdout ) != 0 ) + { + show_error( "Error closing stdout", errno ); + error = true; + } + return error; + } diff --git a/zcatgrep.cc b/zcatgrep.cc new file mode 100644 index 0000000..9bbb359 --- /dev/null +++ b/zcatgrep.cc @@ -0,0 +1,59 @@ +/* Common code for zcat and zgrep + Copyright (C) 2010-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +int simple_extension_index( const std::string & name ) + { + for( int i = 0; i < num_formats; ++i ) + { + const std::string ext( simple_extensions[i] ); + if( name.size() > ext.size() && + name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 ) + return i; + } + return -1; + } + + +int open_instream( std::string & input_filename, const bool search, + const bool no_messages = false ) + { + int infd = open( input_filename.c_str(), O_RDONLY | O_BINARY ); + if( infd < 0 ) + { + const int saved_errno = errno; + if( search && simple_extension_index( input_filename ) < 0 ) + { + for( int i = 0; i < num_formats; ++i ) + if( enabled_format( format_order[i] ) ) + { + const std::string name( input_filename + + simple_extensions[format_order[i]] ); + infd = open( name.c_str(), O_RDONLY | O_BINARY ); + if( infd >= 0 ) { input_filename = name; break; } + } + } + if( infd < 0 && !no_messages ) + show_file_error( input_filename.c_str(), "Can't open input file", + saved_errno ); + } + return infd; + } @@ -0,0 +1,534 @@ +/* Zcmp - decompress and compare two files byte by byte + Copyright (C) 2010-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cctype> +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <string> +#include <vector> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> +#if defined __MSVCRT__ || defined __OS2__ +#include <io.h> +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + +#ifndef LLONG_MAX +#define LLONG_MAX 0x7FFFFFFFFFFFFFFFLL +#endif + + +namespace { + +#include "zcmpdiff.cc" + +void show_help() + { + std::printf( "zcmp compares two files and, if they differ, writes to standard output the\n" + "first byte and line number where they differ. Bytes and lines are numbered\n" + "starting with 1. A hyphen '-' used as a file argument means standard input.\n" + "If any file given is compressed, its decompressed content is used. Compressed\n" + "files are decompressed on the fly; no temporary files are created.\n" + "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n" + "\nUsage: zcmp [options] file1 [file2]\n" + "\nzcmp compares file1 to file2. The standard input is used only if file1 or\n" + "file2 refers to standard input. If file2 is omitted zcmp tries to compare\n" + "file1 with the corresponding uncompressed file (if file1 is compressed), and\n" + "then with the corresponding compressed files of the remaining formats until\n" + "one is found.\n" + "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -b, --print-bytes print differing bytes\n" + " -H, --hexadecimal print hexadecimal values instead of octal\n" + " -i, --ignore-initial=<n>[:<n2>] ignore differences in the first <n> bytes\n" + " -l, --list list position, value of all differing bytes\n" + " -M, --format=<list> process only the formats in <list>\n" + " -n, --bytes=<n> compare at most <n> bytes\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -O, --force-format=[<f1>][,<f2>] force one or both input formats\n" + " -q, --quiet, --silent suppress diagnostics written to stderr\n" + " -s, --script suppress messages about file differences\n" + " -v, --verbose verbose mode (opposite of --quiet)\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" + " --zst=<command> set compressor and options for zstd format\n" + "\nValid formats for options '-M' and '-O' are 'bz2', 'gz', 'lz', 'xz', 'zst',\n" + "and 'un' for uncompressed.\n" + "\nByte counts given as arguments to options may be expressed in decimal,\n" + "hexadecimal, or octal (using the same syntax as integer constants in C++),\n" + "and may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc.\n" ); + show_help_addr(); + } + + +// separate numbers of 5 or more digits in groups of 3 digits using '_' +const char * format_num3( long long num ) + { + enum { buffers = 8, bufsize = 4 * sizeof num, n = 10 }; + const char * const si_prefix = "kMGTPEZYRQ"; + const char * const binary_prefix = "KMGTPEZYRQ"; + static char buffer[buffers][bufsize]; // circle of static buffers for printf + static int current = 0; + + char * const buf = buffer[current++]; current %= buffers; + char * p = buf + bufsize - 1; // fill the buffer backwards + *p = 0; // terminator + const bool negative = num < 0; + if( num > 1024 || num < -1024 ) + { + char prefix = 0; // try binary first, then si + for( int i = 0; i < n && num != 0 && num % 1024 == 0; ++i ) + { num /= 1024; prefix = binary_prefix[i]; } + if( prefix ) *(--p) = 'i'; + else + for( int i = 0; i < n && num != 0 && num % 1000 == 0; ++i ) + { num /= 1000; prefix = si_prefix[i]; } + if( prefix ) *(--p) = prefix; + } + const bool split = num >= 10000 || num <= -10000; + + for( int i = 0; ; ) + { + const long long onum = num; num /= 10; + *(--p) = llabs( onum - ( 10 * num ) ) + '0'; if( num == 0 ) break; + if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; } + } + if( negative ) *(--p) = '-'; + return p; + } + + +// Recognized formats: <num>k[B], <num>Ki[B], <num>[MGTPEZYRQ][i][B] +long long getnum( const char * const arg, const char * const option_name, + const char ** const tailp = 0, + const long long llimit = 0, + const long long ulimit = LLONG_MAX ) + { + char * tail; + errno = 0; + long long result = strtoll( arg, &tail, 0 ); + if( tail == arg ) + { show_option_error( arg, "Bad or missing numerical argument in", + option_name ); std::exit( 2 ); } + if( result < 0 ) errno = ERANGE; + + if( !errno && tail[0] && std::isalpha( tail[0] ) ) + { + const unsigned char ch = *tail++; + int factor; + bool bsuf; // 'B' suffix is present + if( tail[0] == 'i' ) { ++tail; factor = 1024; } else factor = 1000; + if( tail[0] == 'B' ) { ++tail; bsuf = true; } else bsuf = false; + int exponent = -1; // -1 = bad multiplier + switch( ch ) + { + case 'Q': exponent = 10; break; + case 'R': exponent = 9; break; + case 'Y': exponent = 8; break; + case 'Z': exponent = 7; break; + case 'E': exponent = 6; break; + case 'P': exponent = 5; break; + case 'T': exponent = 4; break; + case 'G': exponent = 3; break; + case 'M': exponent = 2; break; + case 'K': if( factor == 1024 ) exponent = 1; break; + case 'k': if( factor == 1000 ) exponent = 1; break; + case 'B': if( factor == 1000 && !bsuf ) exponent = 0; break; + } + if( exponent < 0 ) + { show_option_error( arg, "Bad multiplier in numerical argument of", + option_name ); std::exit( 2 ); } + for( int i = 0; i < exponent; ++i ) + { + if( ulimit / factor >= result ) result *= factor; + else { errno = ERANGE; break; } + } + } + if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; + if( errno ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in " + "option '%s'.\n", program_name, arg, format_num3( llimit ), + format_num3( ulimit ), option_name ); + std::exit( 2 ); + } + if( tailp ) *tailp = tail; + return result; + } + + +void parse_ignore_initial( const char * const arg, const char * const pn, + long long ignore_initial[2] ) + { + const char * tail; + ignore_initial[0] = getnum( arg, pn, &tail ); + if( *tail == ':' || *tail == ',' ) + ignore_initial[1] = getnum( ++tail, pn ); + else if( *tail == 0 ) ignore_initial[1] = ignore_initial[0]; + else { show_option_error( arg, "Missing colon in", pn ); std::exit( 2 ); } + } + + +bool skip_ignore_initial( const long long ignore_initial, const int infd ) + { + if( ignore_initial > 0 ) + { + const int buffer_size = 4096; + long long rest = ignore_initial; + uint8_t buffer[buffer_size]; + while( rest > 0 ) + { + const int size = std::min( rest, (long long)buffer_size ); + const int rd = readblock( infd, buffer, size ); + if( rd != size && errno ) return false; + if( rd < size ) break; // EOF + rest -= rd; + } + } + return true; + } + + +/* Put into buf the unsigned char c, making unprintable bytes visible by + quoting like cat -t does. */ +void sprintc( char * const buf, unsigned char c ) + { + int i = 0; + + if( c < 32 || c >= 127 ) + { + if( c >= 128 ) { c -= 128; buf[i++] = 'M'; buf[i++] = '-'; } + if( c < 32 ) { c += 64; buf[i++] = '^'; } + else if( c == 127 ) { c = '?'; buf[i++] = '^'; } + } + buf[i++] = c; + buf[i++] = 0; + } + + +int block_compare( const uint8_t * const buffer0, + const uint8_t * const buffer1, + unsigned long long * const line_numberp ) + { + const uint8_t * p0 = buffer0; + const uint8_t * p1 = buffer1; + + if( line_numberp ) + { + int nl_count = 0; + while( *p0 == *p1 ) + { if( *p0 == '\n' ) { ++nl_count; } ++p0; ++p1; } + *line_numberp += nl_count; + } + else while( *p0 == *p1 ) { ++p0; ++p1; } + return p0 - buffer0; + } + + +int cmp( const long long max_size, const int infd[2], + const std::string filenames[2], bool finished[2], + const bool hexadecimal, const bool list, const bool print_bytes, + const bool scripted ) + { + const int buffer_size = 4096; + unsigned long long byte_number = 1; + unsigned long long line_number = 1; + // remaining number of bytes to compare + long long rest = ( max_size >= 0 ) ? max_size : buffer_size; + // buffers with space for sentinels at the end + uint8_t * const buffer0 = new uint8_t[2*(buffer_size+1)]; + uint8_t * const buffer1 = buffer0 + buffer_size + 1; + uint8_t * buffer[2]; + buffer[0] = buffer0; buffer[1] = buffer1; + int retval = 0; + bool empty[2] = { true, true }; + + while( rest > 0 ) + { + const int size = std::min( (long long)buffer_size, rest ); + if( max_size >= 0 ) rest -= size; + int rd[2]; // number of bytes read from each file + for( int i = 0; i < 2; ++i ) + { + rd[i] = readblock( infd[i], buffer[i], size ); + if( rd[i] != size && errno ) + { show_file_error( filenames[i].c_str(), "Read error", errno ); + retval = 2; goto done; } + if( rd[i] > 0 ) empty[i] = false; + } + for( int i = 0; i < 2; ++i ) + if( rd[i] < size ) finished[i] = true; + + const int min_rd = std::min( rd[0], rd[1] ); + buffer0[min_rd] = 0; // sentinels for the block compare + buffer1[min_rd] = 1; + + int first_diff = block_compare( buffer0, buffer1, list ? 0 : &line_number ); + byte_number += first_diff; + + if( first_diff < min_rd ) + { + retval = 1; // difference found + if( scripted ) break; // status only + if( !list ) // show first difference + { + if( !print_bytes ) + std::printf( "%s %s differ: byte %llu, line %llu\n", + filenames[0].c_str(), filenames[1].c_str(), + byte_number, line_number ); + else + { + const unsigned char c0 = buffer0[first_diff]; + const unsigned char c1 = buffer1[first_diff]; + char buf0[5], buf1[5]; + sprintc( buf0, c0 ); sprintc( buf1, c1 ); + std::printf( hexadecimal ? + "%s %s differ: byte %llu, line %llu is %02X %s %02X %s\n" : + "%s %s differ: byte %llu, line %llu is %3o %s %3o %s\n", + filenames[0].c_str(), filenames[1].c_str(), + byte_number, line_number, c0, buf0, c1, buf1 ); + } + std::fflush( stdout ); + break; + } + else // list ; show all differences + { + for( ; first_diff < min_rd; ++byte_number, ++first_diff ) + { + const unsigned char c0 = buffer0[first_diff]; + const unsigned char c1 = buffer1[first_diff]; + if( c0 != c1 ) + { + if( !print_bytes ) + std::printf( hexadecimal ? "%llu %02X %02X\n" : "%llu %3o %3o\n", + byte_number, c0, c1 ); + else + { + char buf0[5], buf1[5]; + sprintc( buf0, c0 ); sprintc( buf1, c1 ); + std::printf( hexadecimal ? "%llu %02X %-4s %02X %s\n" : + "%llu %3o %-4s %3o %s\n", + byte_number, c0, buf0, c1, buf1 ); + } + } + } + std::fflush( stdout ); + } + } + + if( rd[0] != rd[1] ) + { + const int i = rd[1] < rd[0]; + if( verbosity >= 0 ) + std::fprintf( stderr, empty[i] ? + "%s: EOF on %s which is empty\n" : list ? + "%s: EOF on %s after byte %llu\n" : + "%s: EOF on %s after byte %llu, in line %llu\n", + program_name, filenames[i].c_str(), + byte_number - 1, line_number ); + retval = 1; break; + } + if( min_rd != buffer_size ) break; + } +done: + delete[] buffer0; + return retval; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt }; + // number of initial bytes ignored for each file + long long ignore_initial[2] = { 0, 0 }; + long long max_size = -1; // < 0 means unlimited size + int format_types[2] = { -1, -1 }; // < 0 means undefined + bool hexadecimal = false; + bool list = false; // list position, value of all differing bytes + bool print_bytes = false; // print differing bytes + bool scripted = false; // suppress messages about file differences + program_name = "zcmp"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'b', "print-bytes", Arg_parser::no }, + { 'h', "help", Arg_parser::no }, + { 'H', "hexadecimal", Arg_parser::no }, + { 'i', "ignore-initial", Arg_parser::yes }, + { 'l', "list", Arg_parser::no }, + { 'M', "format", Arg_parser::yes }, + { 'n', "bytes", Arg_parser::yes }, + { 'N', "no-rcfile", Arg_parser::no }, + { 'O', "force-format", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 'q', "silent", Arg_parser::no }, + { 's', "script", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { zst_opt, "zst", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 2; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); + const std::string & sarg = parser.argument( argind ); + const char * const arg = sarg.c_str(); + switch( code ) + { + case 'b': print_bytes = true; break; + case 'h': show_help(); return 0; + case 'H': hexadecimal = true; break; + case 'i': parse_ignore_initial( arg, pn, ignore_initial ); break; + case 'l': list = true; break; + case 'M': parse_format_list( sarg, pn ); break; + case 'n': max_size = getnum( arg, pn ); break; + case 'N': break; + case 'O': parse_format_types2( sarg, pn, format_types ); break; + case 'q': verbosity = -1; break; + case 's': scripted = true; break; + case 'v': if( verbosity < 4 ) ++verbosity; break; + case 'V': show_version(); return 0; + case bz2_opt: parse_compressor( sarg, pn, fmt_bz2 ); break; + case gz_opt: parse_compressor( sarg, pn, fmt_gz ); break; + case lz_opt: parse_compressor( sarg, pn, fmt_lz ); break; + case xz_opt: parse_compressor( sarg, pn, fmt_xz ); break; + case zst_opt: parse_compressor( sarg, pn, fmt_zst ); break; + default: internal_error( "uncaught option." ); + } + } // end process options + +#if defined __MSVCRT__ || defined __OS2__ + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + const int files = parser.arguments() - argind; + if( files < 1 ) { show_error( "No files given.", 0, true ); return 2; } + if( files > 2 ) { show_error( "Too many files.", 0, true ); return 2; } + + std::string filenames[2]; // file names of the two input files + filenames[0] = parser.argument( argind ); + if( files == 2 ) filenames[1] = parser.argument( argind + 1 ); + + int infd[2]; // file descriptors of the two files + infd[0] = ( filenames[0] == "-" ) ? + STDIN_FILENO : open_instream( filenames[0] ); + if( infd[0] < 0 ) return 2; + + if( files == 2 ) + { + if( check_identical( filenames[0].c_str(), filenames[1].c_str() ) ) + { + if( ignore_initial[0] == ignore_initial[1] ) return 0; + else { show_error( "Can't compare parts of same file." ); return 2; } + } + infd[1] = ( filenames[1] == "-" ) ? + STDIN_FILENO : open_instream( filenames[1] ); + if( infd[1] < 0 ) return 2; + } + else + { + if( filenames[0] == "-" ) + { show_error( "Missing operand after '-'.", 0, true ); return 2; } + if( format_types[0] >= 0 || format_types[1] >= 0 ) + { show_error( "Two files must be given when format is specified.", 0, true ); + return 2; } + filenames[1] = filenames[0]; + infd[1] = open_other_instream( filenames[1] ); + if( infd[1] < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't find file to compare with '%s'.\n", + program_name, filenames[0].c_str() ); + show_error( 0, 0, true ); return 2; + } + } + + int old_infd[2]; // copy of file descriptors of the two files + old_infd[0] = infd[0]; old_infd[1] = infd[1]; + Children children[2]; + if( !set_data_feeder( filenames[0], &infd[0], children[0], format_types[0] ) || + !set_data_feeder( filenames[1], &infd[1], children[1], format_types[1] ) ) + return 2; + + for( int i = 0; i < 2; ++i ) + if( !skip_ignore_initial( ignore_initial[i], infd[i] ) ) + { + show_file_error( filenames[i].c_str(), + "Read error skipping initial bytes", errno ); + return 2; + } + + bool finished[2] = { false, false }; + int retval = cmp( max_size, infd, filenames, finished, hexadecimal, list, + print_bytes, scripted ); + + for( int i = 0; i < 2; ++i ) + if( !good_status( children[i], finished[i] ) ) retval = 2; + + for( int i = 0; i < 2; ++i ) + { + if( close( infd[i] ) != 0 ) + { show_close_error(); retval = 2; } + if( filenames[i] != "-" && close( old_infd[i] ) != 0 ) + { + show_file_error( filenames[i].c_str(), "Error closing input file", errno ); + retval = 2; + } + } + if( std::fclose( stdout ) != 0 ) + { + show_error( "Error closing stdout", errno ); + retval = 2; + } + + return retval; + } diff --git a/zcmpdiff.cc b/zcmpdiff.cc new file mode 100644 index 0000000..16e3980 --- /dev/null +++ b/zcmpdiff.cc @@ -0,0 +1,78 @@ +/* Common code for zcmp and zdiff + Copyright (C) 2010-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +int open_instream( const std::string & input_filename ) + { + const int infd = open( input_filename.c_str(), O_RDONLY | O_BINARY ); + if( infd < 0 ) + show_file_error( input_filename.c_str(), "Can't open input file", errno ); + return infd; + } + + +int open_other_instream( std::string & name ) + { + const int eindex = extension_index( name ); // search extension + if( eindex >= 0 && enabled_format( -1 ) ) // open uncompressed version + { + std::string s( name, 0, name.size() - std::strlen( extension_from( eindex ) ) ); + s += extension_to( eindex ); + const int infd = open( s.c_str(), O_RDONLY | O_BINARY ); + if( infd >= 0 ) { name = s; return infd; } + } + const int eformat = extension_format( eindex ); + for( int i = 0; i < num_formats; ++i ) // search compressed version + { + const int format_index = format_order[i]; + if( eformat != format_index && enabled_format( format_index ) ) + { + std::string s( name, 0, name.size() - std::strlen( extension_from( eindex ) ) ); + s += simple_extensions[format_index]; + const int infd = open( s.c_str(), O_RDONLY | O_BINARY ); + if( infd >= 0 ) { name = s; return infd; } + } + } + return -1; + } + + +void parse_format_types2( const std::string & arg, const char * const pn, + int format_types[2] ) + { + const unsigned i = std::min( arg.find( ',' ), arg.size() ); + if( i != std::min( arg.rfind( ',' ), arg.size() ) ) + { show_option_error( arg.c_str(), "Too many formats in", pn ); + std::exit( 1 ); } + format_types[0] = + ( i > 0 ) ? parse_format_type( arg.substr( 0, i ), pn ) : -1; + format_types[1] = + ( i + 1 < arg.size() ) ? parse_format_type( arg.substr( i + 1 ), pn ) : -1; + } + + +bool check_identical( const char * const name1, const char * const name2 ) + { + if( std::strcmp( name1, name2 ) == 0 ) return true; + struct stat stat1, stat2; + if( stat( name1, &stat1 ) || stat( name2, &stat2 ) ) return false; + return ( stat1.st_ino == stat2.st_ino && stat1.st_dev == stat2.st_dev ); + } diff --git a/zdiff.cc b/zdiff.cc new file mode 100644 index 0000000..a601459 --- /dev/null +++ b/zdiff.cc @@ -0,0 +1,446 @@ +/* Zdiff - decompress and compare two files line by line + Copyright (C) 2010-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cctype> +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <string> +#include <vector> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> +#if defined __MSVCRT__ || defined __OS2__ +#include <io.h> +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + + +namespace { + +std::string fifonames[2]; // names of the two fifos passed to diff + +#include "zcmpdiff.cc" + +void show_help() + { + std::printf( "zdiff compares two files and, if they differ, writes to standard output the\n" + "differences line by line. A hyphen '-' used as a file argument means standard\n" + "input. If any file given is compressed, its decompressed content is used.\n" + "zdiff is a front end to the program diff and has the limitation that messages\n" + "from diff refer to temporary file names instead of those specified.\n" + "\n'zdiff -v -V' prints the version of the diff program used.\n" + "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n" + "\nUsage: zdiff [options] file1 [file2]\n" + "\nzdiff compares file1 to file2. The standard input is used only if file1 or\n" + "file2 refers to standard input. If file2 is omitted zdiff tries to compare\n" + "file1 with the corresponding uncompressed file (if file1 is compressed), and\n" + "then with the corresponding compressed files of the remaining formats until\n" + "one is found.\n" + "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n" + "Some options only work if the diff program used supports them.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -a, --text treat all files as text\n" + " -b, --ignore-space-change ignore changes in the amount of white space\n" + " -B, --ignore-blank-lines ignore changes whose lines are all blank\n" + " -c use the context output format\n" + " -C, --context=<n> same as -c but use <n> lines of context\n" + " -d, --minimal try hard to find a smaller set of changes\n" + " -E, --ignore-tab-expansion ignore changes due to tab expansion\n" + " -i, --ignore-case ignore case differences\n" + " -M, --format=<list> process only the formats in <list>\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -O, --force-format=[<f1>][,<f2>] force one or both input formats\n" + " -p, --show-c-function show which C function each change is in\n" + " -q, --brief output only whether files differ\n" + " -s, --report-identical-files report when two files are identical\n" + " -t, --expand-tabs expand tabs to spaces in output\n" + " -T, --initial-tab make tabs line up by prepending a tab\n" + " -u use the unified output format\n" + " -U, --unified=<n> same as -u but use <n> lines of context\n" + " -v, --verbose verbose mode (for --version)\n" + " -w, --ignore-all-space ignore all white space\n" + " -W, --width=<n> output at most <n> print columns (for -y)\n" + " -y, --side-by-side output in two columns\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" + " --zst=<command> set compressor and options for zstd format\n" + "\nValid formats for options '-M' and '-O' are 'bz2', 'gz', 'lz', 'xz', 'zst',\n" + "and 'un' for uncompressed.\n" ); + show_help_addr(); + } + + +const char * my_basename( const char * filename ) + { + const char * c = filename; + while( *c ) { if( *c == '/' ) { filename = c + 1; } ++c; } + return filename; + } + + +extern "C" void remove_fifos() + { + if( fifonames[0].size() ) + { std::remove( fifonames[0].c_str() ); fifonames[0].clear(); } + if( fifonames[1].size() ) + { std::remove( fifonames[1].c_str() ); fifonames[1].clear(); } + } + + +/* Set fifonames[i] to "${TMPDIR}/<coded_pid>[_-]<basename(filenames[i])>" + and create FIFOs. The pid is coded in little endian order. +*/ +bool set_fifonames( const std::string filenames[2] ) + { + enum { num_codes = 36 }; + const char * const codes = "0123456789abcdefghijklmnopqrstuvwxyz"; + const char * p = std::getenv( "TMPDIR" ); + + if( p ) { fifonames[0] = p; fifonames[0] += '/'; } + else fifonames[0] = "/tmp/"; + unsigned n = getpid(); + do fifonames[0] += codes[n % num_codes]; while( n /= num_codes ); + const unsigned pos = fifonames[0].size(); + fifonames[0] += '_'; + fifonames[1] = fifonames[0]; + fifonames[0] += my_basename( filenames[0].c_str() ); + fifonames[1] += my_basename( filenames[1].c_str() ); + if( fifonames[1] == fifonames[0] ) fifonames[1][pos] = '-'; + + for( int i = 0; i < 2; ++i ) + if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) != 0 ) + { + if( errno == EEXIST ) + { + std::remove( fifonames[i].c_str() ); + if( mkfifo( fifonames[i].c_str(), S_IRUSR | S_IWUSR ) == 0 ) + continue; + } + show_file_error( fifonames[i].c_str(), "Can't create FIFO", errno ); + return false; + } + return true; + } + + +bool set_data_feeder( const std::string & filename, + const std::string & fifoname, const int infd, + Children & children, int format_index ) + { + uint8_t magic_data[magic_buf_size]; + int magic_size = 0; + if( format_index < 0 ) + format_index = test_format( infd, magic_data, &magic_size ); + children.compressor_name = get_compressor_name( format_index ); + + if( children.compressor_name ) // compressed + { + int fda[2]; // pipe from feeder to compressor + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return false; } + const pid_t pid = fork(); + if( pid == 0 ) // child 1 (compressor feeder) + { + if( close( fda[0] ) != 0 || + !feed_data( filename, infd, fda[1], magic_data, magic_size ) ) + _exit( 2 ); + if( close( fda[1] ) != 0 ) + { show_close_error(); _exit( 2 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return false; } + + const pid_t pid2 = fork(); + if( pid2 == 0 ) // child 2 (compressor) + { + const int outfd = open( fifoname.c_str(), O_WRONLY | O_BINARY ); + if( outfd < 0 ) + { show_file_error( fifoname.c_str(), "Can't open FIFO for writing", + errno ); _exit( 2 ); } + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + dup2( outfd, STDOUT_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 && + close( outfd ) == 0 ) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const char ** const argv = new const char *[size+3]; + argv[0] = children.compressor_name; + for( int i = 0; i < size; ++i ) + argv[i+1] = compressor_args[i].c_str(); + argv[size+1] = ( verbosity >= 0 ) ? "-d" : "-dq"; + argv[size+2] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( children.compressor_name ); + _exit( 2 ); + } + if( pid2 < 0 ) // parent + { show_fork_error( children.compressor_name ); return false; } + + close( fda[0] ); close( fda[1] ); + children.pid[0] = pid; + children.pid[1] = pid2; + } + else // uncompressed + { + const pid_t pid = fork(); + if( pid == 0 ) // child (feeder) + { + const int outfd = open( fifoname.c_str(), O_WRONLY | O_BINARY ); + if( outfd < 0 ) + { show_file_error( fifoname.c_str(), "Can't open FIFO for writing", + errno ); _exit( 2 ); } + if( !feed_data( filename, infd, outfd, magic_data, magic_size ) ) + _exit( 2 ); + if( close( outfd ) != 0 ) + { show_close_error(); _exit( 2 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return false; } + children.pid[0] = pid; + children.pid[1] = 0; + } + return true; + } + + +extern "C" void signal_handler( int sig ) + { + remove_fifos(); + std::signal( sig, SIG_DFL ); + std::raise( sig ); + } + + +void set_signals() + { + std::signal( SIGHUP, signal_handler ); + std::signal( SIGINT, signal_handler ); + std::signal( SIGTERM, signal_handler ); + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt }; + std::vector< const char * > diff_args; // args to diff, maybe empty + int format_types[2] = { -1, -1 }; // < 0 means undefined + program_name = "zdiff"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'a', "text", Arg_parser::no }, + { 'b', "ignore-space-change", Arg_parser::no }, + { 'B', "ignore-blank-lines", Arg_parser::no }, + { 'c', 0, Arg_parser::no }, + { 'C', "context", Arg_parser::yes }, + { 'd', "minimal", Arg_parser::no }, + { 'E', "ignore-tab-expansion", Arg_parser::no }, + { 'h', "help", Arg_parser::no }, + { 'i', "ignore-case", Arg_parser::no }, + { 'M', "format", Arg_parser::yes }, + { 'N', "no-rcfile", Arg_parser::no }, + { 'O', "force-format", Arg_parser::yes }, + { 'p', "show-c-function", Arg_parser::no }, + { 'q', "brief", Arg_parser::no }, + { 's', "report-identical-files", Arg_parser::no }, + { 't', "expand-tabs", Arg_parser::no }, + { 'T', "initial-tab", Arg_parser::no }, + { 'u', 0, Arg_parser::no }, + { 'U', "unified", Arg_parser::yes }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { 'w', "ignore-all-space", Arg_parser::no }, + { 'W', "width", Arg_parser::yes }, + { 'y', "side-by-side", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { zst_opt, "zst", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 2; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); + const std::string & sarg = parser.argument( argind ); + const char * const arg = sarg.c_str(); + switch( code ) + { + case 'a': diff_args.push_back( "-a" ); break; + case 'b': diff_args.push_back( "-b" ); break; + case 'B': diff_args.push_back( "-B" ); break; + case 'c': diff_args.push_back( "-c" ); break; + case 'C': diff_args.push_back( "-C" ); diff_args.push_back( arg ); break; + case 'd': diff_args.push_back( "-d" ); break; + case 'E': diff_args.push_back( "-E" ); break; + case 'h': show_help(); return 0; + case 'i': diff_args.push_back( "-i" ); break; + case 'M': parse_format_list( sarg, pn ); break; + case 'N': break; + case 'O': parse_format_types2( sarg, pn, format_types ); break; + case 'p': diff_args.push_back( "-p" ); break; + case 'q': diff_args.push_back( "-q" ); break; + case 's': diff_args.push_back( "-s" ); break; + case 't': diff_args.push_back( "-t" ); break; + case 'T': diff_args.push_back( "-T" ); break; + case 'u': diff_args.push_back( "-u" ); break; + case 'U': diff_args.push_back( "-U" ); diff_args.push_back( arg ); break; + case 'v': if( verbosity < 4 ) ++verbosity; break; + case 'V': show_version( DIFF " --version" ); return 0; + case 'w': diff_args.push_back( "-w" ); break; + case 'W': diff_args.push_back( "-W" ); diff_args.push_back( arg ); break; + case 'y': diff_args.push_back( "-y" ); break; + case bz2_opt: parse_compressor( sarg, pn, fmt_bz2 ); break; + case gz_opt: parse_compressor( sarg, pn, fmt_gz ); break; + case lz_opt: parse_compressor( sarg, pn, fmt_lz ); break; + case xz_opt: parse_compressor( sarg, pn, fmt_xz ); break; + case zst_opt: parse_compressor( sarg, pn, fmt_zst ); break; + default: internal_error( "uncaught option." ); + } + } // end process options + +#if defined __MSVCRT__ || defined __OS2__ + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + const int files = parser.arguments() - argind; + if( files < 1 ) { show_error( "No files given.", 0, true ); return 2; } + if( files > 2 ) { show_error( "Too many files.", 0, true ); return 2; } + + std::string filenames[2]; // file names of the two input files + filenames[0] = parser.argument( argind ); + if( files == 2 ) filenames[1] = parser.argument( argind + 1 ); + + int infd[2]; // file descriptors of the two files + infd[0] = ( filenames[0] == "-" ) ? + STDIN_FILENO : open_instream( filenames[0] ); + if( infd[0] < 0 ) return 2; + + if( files == 2 ) + { + if( check_identical( filenames[0].c_str(), filenames[1].c_str() ) ) + return 0; + infd[1] = ( filenames[1] == "-" ) ? + STDIN_FILENO : open_instream( filenames[1] ); + if( infd[1] < 0 ) return 2; + } + else + { + if( filenames[0] == "-" ) + { show_error( "Missing operand after '-'.", 0, true ); return 2; } + if( format_types[0] >= 0 || format_types[1] >= 0 ) + { show_error( "Two files must be given when format is specified.", 0, true ); + return 2; } + filenames[1] = filenames[0]; + infd[1] = open_other_instream( filenames[1] ); + if( infd[1] < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Can't find file to compare with '%s'.\n", + program_name, filenames[0].c_str() ); + show_error( 0, 0, true ); return 2; + } + } + + std::atexit( remove_fifos ); + set_signals(); + if( !set_fifonames( filenames ) ) return 2; + + Children children[2]; + if( !set_data_feeder( filenames[0], fifonames[0], infd[0], children[0], + format_types[0] ) || + !set_data_feeder( filenames[1], fifonames[1], infd[1], children[1], + format_types[1] ) ) + return 2; + + const pid_t diff_pid = fork(); + if( diff_pid == 0 ) // child (diff) + { + const char ** const argv = new const char *[diff_args.size()+5]; + argv[0] = DIFF; + for( unsigned i = 0; i < diff_args.size(); ++i ) + argv[i+1] = diff_args[i]; + argv[diff_args.size()+1] = "--"; + argv[diff_args.size()+2] = fifonames[0].c_str(); + argv[diff_args.size()+3] = fifonames[1].c_str(); + argv[diff_args.size()+4] = 0; + execvp( argv[0], (char **)argv ); + show_exec_error( DIFF ); + _exit( 2 ); + } + if( diff_pid < 0 ) // parent + { show_fork_error( DIFF ); return 2; } + + int retval = wait_for_child( diff_pid, DIFF ); + + for( int i = 0; i < 2; ++i ) + { + int infd; // fifo from decompressor + do infd = open( fifonames[i].c_str(), O_RDONLY | O_NONBLOCK | O_BINARY ); + while( infd < 0 && errno == EINTR ); + bool finished = false; // set to true if fifo is empty and at EOF + if( infd >= 0 ) + { + uint8_t b; + if( readblock( infd, &b, 1 ) <= 0 && errno == 0 ) finished = true; + close( infd ); + } + if( !good_status( children[i], finished ) ) retval = 2; + } + + for( int i = 0; i < 2; ++i ) + if( filenames[i] != "-" && close( infd[i] ) != 0 ) + { + show_file_error( filenames[i].c_str(), "Error closing input file", errno ); + retval = 2; + } + + return retval; + } diff --git a/zegrep.in b/zegrep.in new file mode 100644 index 0000000..0cac12e --- /dev/null +++ b/zegrep.in @@ -0,0 +1,3 @@ +#! /bin/sh +bindir=`echo "$0" | sed -e 's,[^/]*$,,'` +exec "${bindir}"zgrep -E "$@" diff --git a/zfgrep.in b/zfgrep.in new file mode 100644 index 0000000..c1a96d8 --- /dev/null +++ b/zfgrep.in @@ -0,0 +1,3 @@ +#! /bin/sh +bindir=`echo "$0" | sed -e 's,[^/]*$,,'` +exec "${bindir}"zgrep -F "$@" diff --git a/zgrep.cc b/zgrep.cc new file mode 100644 index 0000000..8f4bc9d --- /dev/null +++ b/zgrep.cc @@ -0,0 +1,417 @@ +/* Zgrep - search compressed files for a regular expression + Copyright (C) 2010-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <list> +#include <string> +#include <vector> +#include <dirent.h> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> +#if defined __MSVCRT__ || defined __OS2__ +#include <io.h> +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + + +namespace { + +#include "recursive.cc" +#include "zcatgrep.cc" + +void show_help() + { + std::printf( "zgrep is a front end to the program grep that allows transparent search\n" + "on any combination of compressed and uncompressed files. If any file\n" + "given is compressed, its decompressed content is used. If a file given\n" + "does not exist, and its name does not end with one of the known\n" + "extensions, zgrep tries the compressed file names corresponding to the\n" + "formats supported until one is found. If a file fails to decompress, zgrep\n" + "continues searching the rest of the files.\n" + "\nIf a file is specified as '-', data are read from standard input,\n" + "decompressed if needed, and fed to grep. Data read from standard input\n" + "must be of the same type; all uncompressed or all in the same\n" + "compressed format.\n" + "\nIf no files are specified, recursive searches examine the current\n" + "working directory, and nonrecursive searches read standard input.\n" + "\n'zgrep --verbose -V' prints the version of the grep program used.\n" + "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n" + "\nUsage: zgrep [options] <pattern> [files]\n" + "\nExit status is 0 if match, 1 if no match, 2 if trouble.\n" + "Some options only work if the grep program used supports them.\n" + "\nOptions:\n" + " --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -a, --text treat all files as text\n" + " -A, --after-context=<n> print <n> lines of trailing context\n" + " -b, --byte-offset print the byte offset of each line\n" + " -B, --before-context=<n> print <n> lines of leading context\n" + " -c, --count only print a count of matching lines per file\n" + " -C, --context=<n> print <n> lines of output context\n" + " --color[=<when>] show matched strings in color\n" + " -e, --regexp=<pattern> use <pattern> as the pattern to match\n" + " -E, --extended-regexp <pattern> is an extended regular expression\n" + " -f, --file=<file> obtain patterns from <file>\n" + " -F, --fixed-strings <pattern> is a set of newline-separated strings\n" + " -G, --basic-regexp <pattern> is a basic regular expression (default)\n" + " -h, --no-filename suppress the prefixing file name on output\n" + " -H, --with-filename print the file name for each match\n" + " -i, --ignore-case ignore case distinctions\n" + " -I ignore binary files\n" + " -l, --files-with-matches only print names of files containing matches\n" + " -L, --files-without-match only print names of files containing no matches\n" + " --label=<label> use <label> as file name for standard input\n" + " --line-buffered flush output on every line\n" + " -m, --max-count=<n> stop after <n> matches\n" + " -M, --format=<list> process only the formats in <list>\n" + " -n, --line-number print the line number of each line\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -o, --only-matching show only the part of a line matching <pattern>\n" + " -O, --force-format=<fmt> force the input format\n" + " -P, --perl-regexp <pattern> is a Perl regular expression\n" + " -q, --quiet, --silent suppress all messages\n" + " -r, --recursive operate recursively on directories\n" + " -R, --dereference-recursive recursively follow symbolic links\n" + " -s, --no-messages suppress error messages\n" + " -T, --initial-tab make tabs line up (if needed)\n" + " -U, --binary don't strip CR characters at EOL (DOS/Windows)\n" + " -v, --invert-match select non-matching lines\n" + " --verbose verbose mode (show error messages)\n" + " -w, --word-regexp match only whole words\n" + " -x, --line-regexp match only whole lines\n" + " -Z, --null print 0 byte (ASCII NUL) after file name\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" + " --zst=<command> set compressor and options for zstd format\n" + "\nValid formats for options '-M' and '-O' are 'bz2', 'gz', 'lz', 'xz', 'zst',\n" + "and 'un' for uncompressed.\n" + "\nNumbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n" + "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" ); + show_help_addr(); + } + + +int zgrep_file( int infd, const int format_index, + const std::string & input_filename, + const std::vector< const char * > & grep_args, + const int list_mode, const bool initial_tab, + const bool line_buffered, const bool show_name, + const bool z_null ) + { + Children children; + if( !set_data_feeder( input_filename, &infd, children, format_index ) ) + return 2; + int fda[2]; // pipe from grep + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return 2; } + const pid_t grep_pid = fork(); + if( grep_pid == 0 ) // child (grep) + { + if( dup2( infd, STDIN_FILENO ) >= 0 && + dup2( fda[1], STDOUT_FILENO ) >= 0 && + close( infd ) == 0 && close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const char ** const argv = new const char *[grep_args.size()+2]; + argv[0] = GREP; + for( unsigned i = 0; i < grep_args.size(); ++i ) + argv[i+1] = grep_args[i]; + argv[grep_args.size()+1] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( GREP ); + _exit( 2 ); + } + if( grep_pid < 0 ) // parent + { show_fork_error( GREP ); return 2; } + + close( fda[1] ); + enum { buffer_size = 256 }; + uint8_t buffer[buffer_size]; + bool line_begin = true; + bool at_eof = false; + while( !at_eof ) + { + int size; + bool error = false; + if( line_buffered ) + for( size = 0; size < buffer_size; ) + { if( readblock( fda[0], buffer + size, 1 ) == 1 ) + { ++size; if( buffer[size-1] == '\n' ) break; } + else { at_eof = true; if( errno ) { error = true; } break; } } + else + { size = readblock( fda[0], buffer, buffer_size ); + if( size < buffer_size ) { at_eof = true; if( errno ) error = true; } } + if( error ) + { std::fflush( stdout ); show_error( "Read error", errno ); return 2; } + if( size > 0 && !list_mode ) + { + if( show_name ) // print the file name for each match + for( int i = 0; i < size; ++i ) + { + if( line_begin ) + { line_begin = false; + const int len = std::printf( "%s%c", input_filename.c_str(), + z_null ? 0 : ':' ); + if( initial_tab && len > 0 && len % 8 ) putchar( '\t' ); } + putchar( buffer[i] ); + if( buffer[i] == '\n' ) + { line_begin = true; if( line_buffered ) std::fflush( stdout ); } + } + else if( std::fwrite( buffer, 1, size, stdout ) != (unsigned)size ) + { std::fflush( stdout ); show_error( "Write error", errno ); return 2; } + } + } + std::fflush( stdout ); + + int retval = wait_for_child( grep_pid, GREP ); + + if( !good_status( children, retval == 1 ) ) retval = 2; + + if( list_mode && (retval == 0) == (list_mode == 1) ) + { std::printf( "%s%c", input_filename.c_str(), z_null ? 0 : '\n' ); + std::fflush( stdout ); } + if( close( infd ) != 0 ) + { show_close_error(); return 2; } + if( close( fda[0] ) != 0 ) + { show_close_error( GREP ); return 2; } + return retval; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { help_opt = 256, verbose_opt, color_opt, label_opt, linebuf_opt, + bz2_opt, gz_opt, lz_opt, xz_opt, zst_opt }; + int format_index = -1; // undefined + int list_mode = 0; // 1 = list matches, -1 = list non-matches + int recursive = 0; // 1 = '-r', 2 = '-R' + int show_name = -1; // tri-state bool + bool initial_tab = false; + bool line_buffered = false; + bool no_messages = false; + bool z_null = false; // for '-Z, --null' + std::list< std::string > filenames; + std::vector< const char * > grep_args; // args to grep, maybe empty + std::string color_option; // additional args to grep + std::string label_option; + const char * label = "(standard input)"; // prefix for standard input + program_name = "zgrep"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'a', "text", Arg_parser::no }, // grep GNU + { 'A', "after-context", Arg_parser::yes }, // grep GNU + { 'b', "byte-offset", Arg_parser::no }, // grep GNU + { 'B', "before-context", Arg_parser::yes }, // grep GNU + { 'c', "count", Arg_parser::no }, // grep + { 'C', "context", Arg_parser::yes }, // grep GNU + { 'e', "regexp", Arg_parser::yes }, // grep + { 'E', "extended-regexp", Arg_parser::no }, // grep + { 'f', "file ", Arg_parser::yes }, // grep + { 'F', "fixed-strings", Arg_parser::no }, // grep + { 'G', "basic-regexp", Arg_parser::no }, // grep GNU + { 'h', "no-filename", Arg_parser::no }, // grep GNU + { 'H', "with-filename", Arg_parser::no }, // grep GNU + { 'i', "ignore-case", Arg_parser::no }, // grep + { 'I', 0, Arg_parser::no }, // grep GNU + { 'l', "files-with-matches", Arg_parser::no }, // grep + { 'L', "files-without-match", Arg_parser::no }, // grep GNU + { 'm', "max-count", Arg_parser::yes }, // grep GNU + { 'M', "format", Arg_parser::yes }, + { 'n', "line-number", Arg_parser::no }, // grep + { 'N', "no-rcfile", Arg_parser::no }, + { 'o', "only-matching", Arg_parser::no }, // grep + { 'O', "force-format", Arg_parser::yes }, + { 'P', "perl-regexp", Arg_parser::no }, // grep GNU + { 'q', "quiet", Arg_parser::no }, + { 'q', "silent", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'R', "dereference-recursive", Arg_parser::no }, + { 's', "no-messages", Arg_parser::no }, // grep + { 'T', "initial-tab", Arg_parser::no }, // grep GNU + { 'U', "binary", Arg_parser::no }, // grep GNU + { 'v', "invert-match", Arg_parser::no }, // grep + { 'V', "version", Arg_parser::no }, + { 'w', "word-regexp", Arg_parser::no }, // grep GNU + { 'x', "line-regexp", Arg_parser::no }, // grep + { 'Z', "null", Arg_parser::no }, // grep GNU + { help_opt, "help", Arg_parser::no }, + { verbose_opt, "verbose", Arg_parser::no }, + { color_opt, "color", Arg_parser::maybe }, + { label_opt, "label", Arg_parser::yes }, + { linebuf_opt, "line-buffered", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { zst_opt, "zst", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 2; } + + maybe_process_config_file( parser ); + + int argind = 0; + bool pattern_found = false; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); + const std::string & sarg = parser.argument( argind ); + const char * const arg = sarg.c_str(); + switch( code ) + { + case 'a': grep_args.push_back( "-a" ); break; + case 'A': grep_args.push_back( "-A" ); grep_args.push_back( arg ); break; + case 'b': grep_args.push_back( "-b" ); break; + case 'B': grep_args.push_back( "-B" ); grep_args.push_back( arg ); break; + case 'c': grep_args.push_back( "-c" ); break; + case 'C': grep_args.push_back( "-C" ); grep_args.push_back( arg ); break; + case 'e': grep_args.push_back( "-e" ); grep_args.push_back( arg ); + pattern_found = true; break; + case 'E': grep_args.push_back( "-E" ); break; + case 'f': grep_args.push_back( "-f" ); grep_args.push_back( arg ); + pattern_found = true; break; + case 'F': grep_args.push_back( "-F" ); break; + case 'G': grep_args.push_back( "-G" ); break; + case 'h': show_name = false; break; + case 'H': show_name = true; break; + case 'i': grep_args.push_back( "-i" ); break; + case 'I': grep_args.push_back( "-I" ); break; + case 'l': grep_args.push_back( "-l" ); list_mode = 1; break; + case 'L': grep_args.push_back( "-L" ); list_mode = -1; break; + case 'm': grep_args.push_back( "-m" ); grep_args.push_back( arg ); break; + case 'M': parse_format_list( sarg, pn ); break; + case 'n': grep_args.push_back( "-n" ); break; + case 'N': break; + case 'o': grep_args.push_back( "-o" ); break; + case 'O': format_index = parse_format_type( sarg, pn ); break; + case 'P': grep_args.push_back( "-P" ); break; + case 'q': grep_args.push_back( "-q" ); verbosity = -1; break; + case 'r': recursive = 1; break; + case 'R': recursive = 2; break; + case 's': grep_args.push_back( "-s" ); no_messages = true; break; + case 'T': grep_args.push_back( "-T" ); initial_tab = true; break; + case 'U': grep_args.push_back( "-U" ); break; + case 'v': grep_args.push_back( "-v" ); break; + case 'V': show_version( GREP " --version" ); return 0; + case 'w': grep_args.push_back( "-w" ); break; + case 'x': grep_args.push_back( "-x" ); break; + case 'Z': z_null = true; break; + case help_opt: show_help(); return 0; + case verbose_opt: no_messages = false; if( verbosity < 4 ) ++verbosity; + break; + case color_opt: color_option = "--color"; + if( !sarg.empty() ) { color_option += '='; color_option += sarg; } + break; + case label_opt: label_option = "--label="; label_option += sarg; + label = arg; break; + case linebuf_opt: grep_args.push_back( "--line-buffered" ); + line_buffered = true; break; + case bz2_opt: parse_compressor( sarg, pn, fmt_bz2 ); break; + case gz_opt: parse_compressor( sarg, pn, fmt_gz ); break; + case lz_opt: parse_compressor( sarg, pn, fmt_lz ); break; + case xz_opt: parse_compressor( sarg, pn, fmt_xz ); break; + case zst_opt: parse_compressor( sarg, pn, fmt_zst ); break; + default: internal_error( "uncaught option." ); + } + } // end process options + + if( !color_option.empty() ) // push the last value set + grep_args.push_back( color_option.c_str() ); + if( !label_option.empty() ) // for "Binary file <label> matches" + grep_args.push_back( label_option.c_str() ); + +#if defined __MSVCRT__ || defined __OS2__ + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + if( !pattern_found ) + { + if( argind >= parser.arguments() ) + { show_error( "Pattern not found." ); return 2; } + const std::string & pat = parser.argument( argind++ ); + if( pat.size() && pat[0] == '-' ) grep_args.push_back( "-e" ); + grep_args.push_back( pat.c_str() ); + } + + for( ; argind < parser.arguments(); ++argind ) + filenames.push_back( parser.argument( argind ) ); + + if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" ); + + if( show_name < 0 ) show_name = ( filenames.size() != 1 || recursive ); + + std::string input_filename; + int retval = 1; + bool error = false; + bool stdin_used = false; + while( next_filename( filenames, input_filename, error, recursive, + false, no_messages ) ) + { + int infd; + if( input_filename == "." ) + { + if( stdin_used ) continue; else stdin_used = true; + infd = STDIN_FILENO; input_filename = label; + } + else + { + infd = open_instream( input_filename, format_index < 0, no_messages ); + if( infd < 0 ) { error = true; continue; } + } + + const int tmp = zgrep_file( infd, format_index, input_filename, grep_args, + list_mode, initial_tab, line_buffered, + show_name, z_null ); + if( tmp == 0 || ( tmp == 2 && retval == 1 ) ) retval = tmp; + + if( close( infd ) != 0 ) + { show_file_error( input_filename.c_str(), "Error closing input file", + errno ); error = true; } + if( retval == 0 && verbosity < 0 ) break; + } + + if( std::fclose( stdout ) != 0 ) + { + show_error( "Error closing stdout", errno ); + error = true; + } + if( error && ( retval != 0 || verbosity >= 0 ) ) retval = 2; + return retval; + } diff --git a/ztest.cc b/ztest.cc new file mode 100644 index 0000000..5f74c20 --- /dev/null +++ b/ztest.cc @@ -0,0 +1,369 @@ +/* Ztest - check the integrity of compressed files + Copyright (C) 2010-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <list> +#include <string> +#include <vector> +#include <dirent.h> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <sys/stat.h> +#if defined __MSVCRT__ || defined __OS2__ +#include <io.h> +#endif + +#include "arg_parser.h" +#include "rc.h" +#include "zutils.h" + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +namespace { + +#include "recursive.cc" + +void show_help() + { + std::printf( "ztest checks the integrity of the compressed files specified. It\n" + "also warns if an uncompressed file has a compressed file name extension, or\n" + "if a compressed file has a wrong compressed extension. Uncompressed files\n" + "are otherwise ignored. If a file is specified as '-', the integrity of\n" + "compressed data read from standard input is checked. Data read from\n" + "standard input must be all in the same compressed format. If a file fails to\n" + "decompress, does not exist, can't be opened, or is a terminal, ztest\n" + "continues testing the rest of the files. A final diagnostic is shown at\n" + "verbosity level 1 or higher if any file fails the test when testing multiple\n" + "files.\n" + "\nIf no files are specified, recursive searches examine the current\n" + "working directory, and nonrecursive searches read standard input.\n" + "\nThe formats supported are bzip2, gzip, lzip, xz, and zstd.\n" + "\nNote that error detection in the xz format is broken. First, some xz files\n" + "lack integrity information. Second, not all xz decompressors can check the\n" + "integrity of all xz files. Third, section 2.1.1.2 'Stream Flags' of the\n" + "xz format specification allows xz decompressors to produce garbage output\n" + "without issuing any warning. Therefore, xz files can't always be checked as\n" + "reliably as files in the other formats can.\n" + "\nUsage: ztest [options] [files]\n" + "\nExit status is 0 if all compressed files check OK, 1 if environmental\n" + "problems (file not found, invalid command-line options, I/O errors, etc),\n" + "2 if any compressed file is corrupt or invalid, or if any file has an\n" + "incorrect file name extension.\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -M, --format=<list> process only the formats in <list>\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -O, --force-format=<fmt> force the input format\n" + " -q, --quiet suppress all messages\n" + " -r, --recursive operate recursively on directories\n" + " -R, --dereference-recursive recursively follow symbolic links\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" + " --zst=<command> set compressor and options for zstd format\n" + "\nValid formats for options '-M' and '-O' are 'bz2', 'gz', 'lz', 'xz', and 'zst'.\n" ); + show_help_addr(); + } + + +int open_instream( const std::string & input_filename ) + { + const int infd = open( input_filename.c_str(), O_RDONLY | O_BINARY ); + if( infd < 0 ) + show_file_error( input_filename.c_str(), "Can't open input file", errno ); + return infd; + } + + +int ztest_stdin( const int infd, int format_index, + const std::vector< const char * > & ztest_args ) + { + uint8_t magic_data[magic_buf_size]; + int magic_size = 0; + if( format_index < 0 ) + format_index = test_format( infd, magic_data, &magic_size ); + const char * const compressor_name = get_compressor_name( format_index ); + if( !compressor_name ) + { show_error( "Unknown data format read from stdin." ); return 2; } + int fda[2]; // pipe from feeder + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return 1; } + + const pid_t pid = fork(); + if( pid == 0 ) // child1 (compressor feeder) + { + if( close( fda[0] ) != 0 || + !feed_data( "-", infd, fda[1], magic_data, magic_size ) ) _exit( 1 ); + if( close( fda[1] ) != 0 ) { show_close_error(); _exit( 1 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return 1; } + + const pid_t pid2 = fork(); + if( pid2 == 0 ) // child2 (compressor) + { + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const int size2 = ztest_args.size(); + const char ** const argv = new const char *[size+size2+3]; + argv[0] = compressor_name; + for( int i = 0; i < size; ++i ) + argv[i+1] = compressor_args[i].c_str(); + for( int i = 0; i < size2; ++i ) + argv[i+size+1] = ztest_args[i]; + argv[size+size2+1] = "-t"; + argv[size+size2+2] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( compressor_name ); + _exit( 1 ); + } + if( pid2 < 0 ) // parent + { show_fork_error( compressor_name ); return 1; } + + close( fda[0] ); close( fda[1] ); + const bool isgzxz = ( format_index == fmt_gz || format_index == fmt_xz ); + int retval = wait_for_child( pid2, compressor_name, 1, isgzxz ); + if( retval == 0 && wait_for_child( pid, "data feeder" ) != 0 ) + retval = 1; + return retval; + } + + +int ztest_file( const int infd, int format_index, + const std::string & input_filename, + const std::vector< const char * > & ztest_args ) + { + // bzip2, gzip, and lzip are the primary formats. xz and zstd are optional. + static int disable_xz = -1; // tri-state bool + static int disable_zst = -1; // tri-state bool + uint8_t magic_data[magic_buf_size]; + int magic_size = 0; + const int format_index_e = test_extension( input_filename ); + if( format_index < 0 ) + format_index = test_format( infd, magic_data, &magic_size ); + const char * const compressor_name = get_compressor_name( format_index ); + if( !compressor_name ) + { + if( format_index < 0 && format_index_e >= 0 ) + { show_file_error( input_filename.c_str(), + "Uncompressed file has compressed extension." ); return 2; } + return 0; // ignore this file + } + if( format_index == fmt_xz ) + { + if( disable_xz < 0 ) + { + std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; + disable_xz = ( std::system( command.c_str() ) != 0 ); + if( disable_xz && verbosity >= 2 ) + std::fprintf( stderr, "%s: '%s' not found. Ignoring xz files.\n", + program_name, compressor_name ); + } + if( disable_xz ) return 0; // ignore this file if no xz installed + } + else if( format_index == fmt_zst ) + { + if( disable_zst < 0 ) + { + std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; + disable_zst = ( std::system( command.c_str() ) != 0 ); + if( disable_zst && verbosity >= 2 ) + std::fprintf( stderr, "%s: '%s' not found. Ignoring zstd files.\n", + program_name, compressor_name ); + } + if( disable_zst ) return 0; // ignore this file if no zstd installed + } + + const pid_t pid = fork(); + + if( pid == 0 ) // child (compressor) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const int size2 = ztest_args.size(); + const char ** const argv = new const char *[size+size2+5]; + argv[0] = compressor_name; + for( int i = 0; i < size; ++i ) + argv[i+1] = compressor_args[i].c_str(); + for( int i = 0; i < size2; ++i ) + argv[i+size+1] = ztest_args[i]; + argv[size+size2+1] = "-t"; + argv[size+size2+2] = "--"; + argv[size+size2+3] = input_filename.c_str(); + argv[size+size2+4] = 0; + execvp( argv[0], (char **)argv ); + show_exec_error( compressor_name ); + _exit( 1 ); + } + if( pid < 0 ) // parent + { show_fork_error( compressor_name ); return 1; } + + const bool isgzxz = ( format_index == fmt_gz || format_index == fmt_xz ); + int retval = wait_for_child( pid, compressor_name, 1, isgzxz ); + if( retval == 0 && format_index >= 0 && format_index_e >= 0 && + format_index != format_index_e ) + { show_file_error( input_filename.c_str(), + "Compressed file has wrong compressed extension." ); retval = 2; } + return retval; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt }; + int format_index = -1; // undefined + int recursive = 0; // 1 = '-r', 2 = '-R' + std::list< std::string > filenames; + std::vector< const char * > ztest_args; // args to ztest, maybe empty + program_name = "ztest"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { 'h', "help", Arg_parser::no }, + { 'M', "format", Arg_parser::yes }, + { 'N', "no-rcfile", Arg_parser::no }, + { 'O', "force-format", Arg_parser::yes }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'R', "dereference-recursive", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { zst_opt, "zst", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 1; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case 'h': show_help(); return 0; + case 'M': parse_format_list( arg, pn ); break; + case 'N': break; + case 'O': format_index = parse_format_type( arg, pn, false ); break; + case 'q': verbosity = -1; ztest_args.push_back( "-q" ); break; + case 'r': recursive = 1; break; + case 'R': recursive = 2; break; + case 'v': if( verbosity < 4 ) ++verbosity; + ztest_args.push_back( "-v" ); break; + case 'V': show_version(); return 0; + case bz2_opt: parse_compressor( arg, pn, fmt_bz2, 1 ); break; + case gz_opt: parse_compressor( arg, pn, fmt_gz, 1 ); break; + case lz_opt: parse_compressor( arg, pn, fmt_lz, 1 ); break; + case xz_opt: parse_compressor( arg, pn, fmt_xz, 1 ); break; + case zst_opt: parse_compressor( arg, pn, fmt_zst, 1 ); break; + default: internal_error( "uncaught option." ); + } + } // end process options + +#if defined __MSVCRT__ || defined __OS2__ + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + for( ; argind < parser.arguments(); ++argind ) + filenames.push_back( parser.argument( argind ) ); + + if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" ); + + std::string input_filename; + int files_tested = 0, failed_tests = 0; + int retval = 0; + bool error = false; + bool stdin_used = false; + while( next_filename( filenames, input_filename, error, recursive ) ) + { + int infd; + if( input_filename == "." ) + { + if( stdin_used ) continue; else stdin_used = true; + infd = STDIN_FILENO; input_filename = "-"; + } + else + { + infd = open_instream( input_filename ); + if( infd < 0 ) { error = true; continue; } + } + + if( isatty( infd ) ) // for example /dev/tty + { + show_file_error( name_or_stdin( input_filename.c_str() ), + "I won't read compressed data from a terminal." ); + close( infd ); error = true; continue; + } + + int tmp; + if( infd == STDIN_FILENO ) + tmp = ztest_stdin( infd, format_index, ztest_args ); + else tmp = ztest_file( infd, format_index, input_filename, ztest_args ); + if( tmp > retval ) retval = tmp; + ++files_tested; if( tmp ) ++failed_tests; + + if( close( infd ) != 0 ) + { show_file_error( input_filename.c_str(), "Error closing input file", + errno ); error = true; } + } + + if( std::fclose( stdout ) != 0 ) // in case decompressor writes to stdout + { + show_error( "Error closing stdout", errno ); + error = true; + } + if( error && retval == 0 ) retval = 1; + if( failed_tests > 0 && verbosity >= 1 && files_tested > 1 ) + std::fprintf( stderr, "%s: warning: %d %s failed the test.\n", + program_name, failed_tests, + ( failed_tests == 1 ) ? "file" : "files" ); + return retval; + } diff --git a/zupdate.cc b/zupdate.cc new file mode 100644 index 0000000..64ca0d3 --- /dev/null +++ b/zupdate.cc @@ -0,0 +1,506 @@ +/* Zupdate - recompress bzip2, gzip, xz, zstd files to lzip format + Copyright (C) 2013-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <list> +#include <string> +#include <vector> +#include <dirent.h> +#include <fcntl.h> +#include <stdint.h> +#include <unistd.h> +#include <utime.h> +#include <sys/stat.h> +#include <sys/wait.h> +#if defined __MSVCRT__ || defined __OS2__ +#include <io.h> +#endif + +#include "arg_parser.h" +#include "rc.h" + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + + +namespace { + +#include "recursive.cc" + +void show_help() + { + std::printf( "zupdate recompresses files from bzip2, gzip, xz, and zstd formats to lzip\n" + "format. Each original is compared with the new file and then deleted.\n" + "Only regular files with standard file name extensions are recompressed,\n" + "other files are ignored. Compressed files are decompressed and then\n" + "recompressed on the fly; no temporary files are created. The lzip format\n" + "is chosen as destination because it is the most appropriate for\n" + "long-term archiving.\n" + "\nIf no files are specified, recursive searches examine the current\n" + "working directory, and nonrecursive searches do nothing.\n" + "\nIf the lzip-compressed version of a file already exists, the file is skipped\n" + "unless the option '--force' is given. In this case, if the comparison with\n" + "the existing lzip version fails, an error is returned and the original file\n" + "is not deleted. The operation of zupdate is meant to be safe and not cause\n" + "any data loss. Therefore, existing lzip-compressed files are never\n" + "overwritten nor deleted.\n" + "\nThe names of the original files must have one of the following extensions:\n" + "\n'.bz2', '.gz', '.xz', '.zst', or '.Z', which are recompressed to '.lz'.\n" + "\n'.tbz', '.tbz2', '.tgz', '.txz', or '.tzst', which are recompressed to '.tlz'.\n" + "\nUsage: zupdate [options] [files]\n" + "\nExit status is 0 if all the compressed files were successfully recompressed\n" + "(if needed), compared, and deleted (if requested). 1 if a non-fatal error\n" + "occurred (file not found or not regular, or has invalid format, or can't be\n" + "deleted). 2 if a fatal error occurred (invalid command-line options,\n" + "compressor can't be run, or comparison fails).\n" + "\nOptions:\n" + " -h, --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -d, --destdir=<dir> write recompressed files into <dir>\n" + " -e, --expand-extensions expand combined extensions; tgz --> tar.lz\n" + " -f, --force don't skip a file even if the .lz exists\n" + " -i, --ignore-errors ignore non-fatal errors\n" + " -k, --keep keep (don't delete) input files\n" + " -l, --lzip-verbose pass one option -v to the lzip compressor\n" + " -M, --format=<list> process only the formats in <list>\n" + " -N, --no-rcfile don't read runtime configuration file\n" + " -q, --quiet suppress all messages\n" + " -r, --recursive operate recursively on directories\n" + " -R, --dereference-recursive recursively follow symbolic links\n" + " -v, --verbose be verbose (a 2nd -v gives more)\n" + " -0 .. -9 set compression level [default 9]\n" + " --bz2=<command> set compressor and options for bzip2 format\n" + " --gz=<command> set compressor and options for gzip format\n" + " --lz=<command> set compressor and options for lzip format\n" + " --xz=<command> set compressor and options for xz format\n" + " --zst=<command> set compressor and options for zstd format\n" + "\nValid formats for option '-M' are 'bz2', 'gz', 'lz', 'xz', and 'zst'.\n" ); + show_help_addr(); + } + + +void extract_srcdir_name( const std::string & name, std::string & srcdir ) + { + if( name.empty() || name == "." ) return; // leave srcdir empty + if( name[name.size()-1] == '/' ) // remove last slash + { srcdir.assign( name, 0, name.size() - 1 ); return; } + struct stat st; + if( stat( name.c_str(), &st ) == 0 && S_ISDIR( st.st_mode ) ) + { srcdir = name; return; } + + unsigned size = 0; // size of srcdir without last slash nor basename + for( unsigned i = name.size(); i > 0; --i ) + if( name[i-1] == '/' ) { size = i - 1; break; } + if( size > 0 ) srcdir.assign( name, 0, size ); + } + + +bool make_dirs( const std::string & name ) + { + static std::string cached_dirname; + unsigned i = name.size(); + while( i > 0 && name[i-1] != '/' ) --i; // remove last component + while( i > 0 && name[i-1] == '/' ) --i; // remove slash(es) + if( i == 0 ) return true; // dirname is '/' or empty + const unsigned dirsize = i; // size of dirname without trailing slash(es) + if( cached_dirname.size() == dirsize && + cached_dirname.compare( 0, dirsize, name ) == 0 ) return true; + + for( i = 0; i < dirsize; ) + { + while( i < dirsize && name[i] == '/' ) ++i; + const unsigned first = i; + while( i < dirsize && name[i] != '/' ) ++i; + if( first < i ) + { + const std::string partial( name, 0, i ); + const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; + struct stat st; + if( stat( partial.c_str(), &st ) == 0 ) + { if( !S_ISDIR( st.st_mode ) ) { errno = ENOTDIR; return false; } } + else if( mkdir( partial.c_str(), mode ) != 0 && errno != EEXIST ) + return false; // if EEXIST, another process created the dir + } + } + cached_dirname.assign( name, 0, dirsize ); + return true; + } + + +void cant_execute( const std::string & command, const int status ) + { + if( verbosity >= 0 ) + { + if( WIFEXITED( status ) ) + std::fprintf( stderr, "%s: Error executing '%s'. Exit status = %d\n", + program_name, command.c_str(), WEXITSTATUS( status ) ); + else + std::fprintf( stderr, "%s: Can't execute '%s'\n", + program_name, command.c_str() ); + } + } + + +// Set permissions, owner, and times. +void set_permissions( const char * const rname, const struct stat & in_stats ) + { + bool warning = false; + const mode_t mode = in_stats.st_mode; + // chown in many cases returns with EPERM, which can be safely ignored. + if( chown( rname, in_stats.st_uid, in_stats.st_gid ) == 0 ) + { if( chmod( rname, mode ) != 0 ) warning = true; } + else + if( errno != EPERM || + chmod( rname, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) + warning = true; + struct utimbuf t; + t.actime = in_stats.st_atime; + t.modtime = in_stats.st_mtime; + if( utime( rname, &t ) != 0 ) warning = true; + if( warning && verbosity >= 2 ) + show_file_error( rname, + "warning: can't change output file attributes", errno ); + } + + +// Return value: 0 = success, -1 = file skipped, 1 = error, 2 = fatal error. +int zupdate_file( const std::string & name, const char * const lzip_name, + const std::vector< std::string > & lzip_args2, + const std::string & srcdir, const std::string & destdir, + const bool expand, const bool force, + const bool keep_input_files, const bool no_rcfile ) + { + // bzip2, gzip, and lzip are the primary formats. xz and zstd are optional. + static int disable_xz = -1; // tri-state bool + static int disable_zst = -1; // tri-state bool + int format_index = -1; // undefined + std::string rname; // recompressed name + + const int eindex = extension_index( name ); // search extension + if( eindex >= 0 ) + { + format_index = extension_format( eindex ); + if( format_index == fmt_lz ) + { + if( verbosity >= 2 ) + std::fprintf( stderr, "%s: %s: Input file already has '%s' suffix.\n", + program_name, name.c_str(), extension_from( eindex ) ); + return 0; // ignore this file + } + if( destdir.size() ) + { + if( srcdir.size() && name.compare( 0, srcdir.size(), srcdir ) != 0 ) + internal_error( "srcdir mismatch." ); + rname = destdir; + if( rname[rname.size()-1] != '/' && name[srcdir.size()] != '/' ) + rname += '/'; + rname.append( name, srcdir.size(), name.size() - srcdir.size() - + std::strlen( extension_from( eindex ) ) ); + } + else + rname.assign( name, 0, name.size() - std::strlen( extension_from( eindex ) ) ); + rname += ( std::strcmp( extension_to( eindex ), ".tar" ) == 0 ) ? + ( expand ? ".tar.lz" : ".tlz" ) : ".lz"; + } + const char * const compressor_name = get_compressor_name( format_index ); + if( !compressor_name ) + { + if( verbosity >= 2 ) + show_file_error( name.c_str(), "Unknown extension in file name -- ignored." ); + return 0; // ignore this file + } + + struct stat in_stats; + if( stat( name.c_str(), &in_stats ) != 0 ) // check input file + { show_file_error( name.c_str(), "Can't stat input file", errno ); + return 1; } + if( !S_ISREG( in_stats.st_mode ) ) + { show_file_error( name.c_str(), "Input file is not a regular file." ); + return 1; } + + struct stat st; // not used + const std::string rname2( rname + ".lz" ); // produced by lzip < 1.20 + const bool lz_exists = ( stat( rname.c_str(), &st ) == 0 ); + // don't modify an existing 'rname.lz' + const bool lz_lz_exists = ( stat( rname2.c_str(), &st ) == 0 ); + if( lz_exists && !force ) + { + show_file_error( rname.c_str(), "Output file already exists, skipping." ); + return -1; + } + + if( format_index == fmt_xz ) + { + if( disable_xz < 0 ) + { + std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; + disable_xz = ( std::system( command.c_str() ) != 0 ); + if( disable_xz && verbosity >= 2 ) + show_file_error( compressor_name, + "Xz decompressor not found. Ignoring xz files." ); + } + if( disable_xz ) return 0; // ignore this file if no xz installed + } + else if( format_index == fmt_zst ) + { + if( disable_zst < 0 ) + { + std::string command( compressor_name ); command += " -V > /dev/null 2>&1"; + disable_zst = ( std::system( command.c_str() ) != 0 ); + if( disable_zst && verbosity >= 2 ) + show_file_error( compressor_name, + "Zstd decompressor not found. Ignoring zstd files." ); + } + if( disable_zst ) return 0; // ignore this file if no zstd installed + } + + if( !lz_exists ) // recompress + { + if( verbosity >= 1 ) + std::fprintf( stderr, "Recompressing file '%s'\n", name.c_str() ); + if( destdir.size() && !make_dirs( rname ) ) + { show_file_error( rname.c_str(), + "Error creating intermediate directory", errno ); return 2; } + int fda[2]; // pipe between decompressor and compressor + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return 2; } + + const pid_t pid = fork(); + if( pid == 0 ) // child1 (decompressor) + { + if( dup2( fda[1], STDOUT_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const char ** const argv = new const char *[size+5]; + argv[0] = compressor_name; + for( int i = 0; i < size; ++i ) argv[i+1] = compressor_args[i].c_str(); + argv[size+1] = "-cd"; + argv[size+2] = "--"; + argv[size+3] = name.c_str(); + argv[size+4] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( compressor_name ); + _exit( 1 ); + } + if( pid < 0 ) // parent + { show_fork_error( compressor_name ); return 2; } + + const pid_t pid2 = fork(); + if( pid2 == 0 ) // child2 (lzip compressor) + { + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 ) + { + const std::vector< std::string > & lzip_args = + get_compressor_args( fmt_lz ); + const int size = lzip_args.size(); // from .conf or --lz + const int size2 = lzip_args2.size(); // from command line + const char ** const argv = new const char *[size+size2+5]; + argv[0] = lzip_name; + argv[1] = "-9"; + for( int i = 0; i < size; ++i ) argv[i+2] = lzip_args[i].c_str(); + for( int i = 0; i < size2; ++i ) argv[i+size+2] = lzip_args2[i].c_str(); + argv[size+size2+2] = "-o"; + argv[size+size2+3] = rname.c_str(); + argv[size+size2+4] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( lzip_name ); + _exit( 1 ); + } + if( pid2 < 0 ) // parent + { show_fork_error( lzip_name ); return 2; } + + close( fda[0] ); close( fda[1] ); + const int retval = wait_for_child( pid, compressor_name ); + const int retval2 = wait_for_child( pid2, lzip_name ); + if( retval || retval2 ) + { if( !lz_lz_exists ) std::remove( rname2.c_str() ); // lzip < 1.20 + std::remove( rname.c_str() ); return retval2 ? 2 : 1; } + if( stat( rname.c_str(), &st ) != 0 && + ( lz_lz_exists || stat( rname2.c_str(), &st ) != 0 || + std::rename( rname2.c_str(), rname.c_str() ) != 0 ) ) + { show_file_error( rname.c_str(), "Error renaming output file", errno ); + return 2; } // lzip < 1.11 + set_permissions( rname.c_str(), in_stats ); + } + + { + if( lz_exists && verbosity >= 1 ) + std::fprintf( stderr, "Comparing file '%s'\n", name.c_str() ); + // Quote names in zcmp_command to allow file/dir names with spaces. + std::string zcmp_command( invocation_name ); + unsigned i = zcmp_command.size(); + while( i > 0 && zcmp_command[i-1] != '/' ) --i; // strip "zupdate" + zcmp_command.resize( i ); zcmp_command.insert( zcmp_command.begin(), '\'' ); + zcmp_command += "zcmp' "; // '[dir/]zcmp' + if( no_rcfile ) zcmp_command += "-N "; + if( verbosity < 0 ) zcmp_command += "-q -s "; + zcmp_command += '\''; zcmp_command += name; + zcmp_command += "' '"; zcmp_command += rname; zcmp_command += '\''; + int status = std::system( zcmp_command.c_str() ); + if( status != 0 ) + { if( !lz_exists ) std::remove( rname.c_str() ); + cant_execute( zcmp_command, status ); return 2; } + } + + if( !keep_input_files && std::remove( name.c_str() ) != 0 && errno != ENOENT ) + { show_file_error( name.c_str(), "Can't delete input file", errno ); + return 1; } + return 0; + } + +} // end namespace + + +int main( const int argc, const char * const argv[] ) + { + enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt, zst_opt }; + int recursive = 0; // 1 = '-r', 2 = '-R' + std::string destdir; // write recompressed files here + std::vector< std::string > lzip_args2; // args to lzip, maybe empty + bool expand = false; + bool force = false; + bool ignore_errors = false; + bool keep_input_files = false; + bool no_rcfile = false; + program_name = "zupdate"; + invocation_name = ( argc > 0 ) ? argv[0] : program_name; + + const Arg_parser::Option options[] = + { + { '0', 0, Arg_parser::no }, + { '1', 0, Arg_parser::no }, + { '2', 0, Arg_parser::no }, + { '3', 0, Arg_parser::no }, + { '4', 0, Arg_parser::no }, + { '5', 0, Arg_parser::no }, + { '6', 0, Arg_parser::no }, + { '7', 0, Arg_parser::no }, + { '8', 0, Arg_parser::no }, + { '9', 0, Arg_parser::no }, + { 'd', "destdir", Arg_parser::yes }, + { 'e', "expand-extensions", Arg_parser::no }, + { 'f', "force", Arg_parser::no }, + { 'h', "help", Arg_parser::no }, + { 'i', "ignore-errors", Arg_parser::no }, + { 'k', "keep", Arg_parser::no }, + { 'l', "lzip-verbose", Arg_parser::no }, + { 'M', "format", Arg_parser::yes }, + { 'N', "no-rcfile", Arg_parser::no }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "recursive", Arg_parser::no }, + { 'R', "dereference-recursive", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { bz2_opt, "bz2", Arg_parser::yes }, + { gz_opt, "gz", Arg_parser::yes }, + { lz_opt, "lz", Arg_parser::yes }, + { xz_opt, "xz", Arg_parser::yes }, + { zst_opt, "zst", Arg_parser::yes }, + { 0, 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options ); + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 2; } + + maybe_process_config_file( parser ); + + int argind = 0; + for( ; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) break; // no more options + const char * const pn = parser.parsed_name( argind ).c_str(); + const std::string & arg = parser.argument( argind ); + switch( code ) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + lzip_args2.push_back( "-" ); lzip_args2.back() += code; break; + case 'd': destdir = arg; break; + case 'e': expand = true; break; + case 'f': force = true; break; + case 'h': show_help(); return 0; + case 'i': ignore_errors = true; break; + case 'k': keep_input_files = true; break; + case 'l': lzip_args2.push_back( "-v" ); break; + case 'M': parse_format_list( arg, pn ); break; + case 'N': no_rcfile = true; break; + case 'q': verbosity = -1; lzip_args2.push_back( "-q" ); break; + case 'r': recursive = 1; break; + case 'R': recursive = 2; break; + case 'v': if( verbosity < 4 ) ++verbosity; break; + case 'V': show_version(); return 0; + case bz2_opt: parse_compressor( arg, pn, fmt_bz2, 1 ); break; + case gz_opt: parse_compressor( arg, pn, fmt_gz, 1 ); break; + case lz_opt: parse_compressor( arg, pn, fmt_lz, 1 ); break; + case xz_opt: parse_compressor( arg, pn, fmt_xz, 1 ); break; + case zst_opt: parse_compressor( arg, pn, fmt_zst, 1 ); break; + default: internal_error( "uncaught option." ); + } + } // end process options + +#if defined __MSVCRT__ || defined __OS2__ + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + const char * const lzip_name = get_compressor_name( fmt_lz ); + if( !lzip_name ) + { show_error( "Missing name of compressor for lzip format." ); return 2; } + + std::list< std::string > filenames; + if( argind < parser.arguments() ) + filenames.push_back( parser.argument( argind++ ) ); // first argument + else if( recursive ) filenames.push_back( "." ); + else return 0; // nothing to do + + std::string input_filename; + int retval = 0; + bool error = false; + while( true ) + { + std::string srcdir; // dirname to be replaced by destdir + if( destdir.size() ) extract_srcdir_name( filenames.front(), srcdir ); + while( next_filename( filenames, input_filename, error, recursive, true ) ) + { + int tmp = zupdate_file( input_filename, lzip_name, lzip_args2, srcdir, + destdir, expand, force, keep_input_files, no_rcfile ); + if( tmp < 0 ) error = true; // file skipped + if( tmp > retval ) retval = tmp; + if( tmp >= 2 || ( tmp == 1 && !ignore_errors ) ) goto out; + } + if( argind >= parser.arguments() ) break; + filenames.push_back( parser.argument( argind++ ) ); + } +out: + if( error && retval == 0 ) retval = 1; + return retval; + } diff --git a/zutils.cc b/zutils.cc new file mode 100644 index 0000000..2509d10 --- /dev/null +++ b/zutils.cc @@ -0,0 +1,292 @@ +/* Zutils - Utilities dealing with compressed files + Copyright (C) 2009-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <string> +#include <vector> +#include <stdint.h> +#include <unistd.h> +#include <sys/wait.h> + +#include "rc.h" +#include "zutils.h" + + +namespace { + +inline bool isvalid_ds( const uint8_t ds ) // lzip valid dictionary_size + { + enum { min_dictionary_size = 1 << 12, + max_dictionary_size = 1 << 29 }; + unsigned dictionary_size = ( 1 << ( ds & 0x1F ) ); + if( dictionary_size > min_dictionary_size ) + dictionary_size -= ( dictionary_size / 16 ) * ( ( ds >> 5 ) & 7 ); + return ( dictionary_size >= min_dictionary_size && + dictionary_size <= max_dictionary_size ); + } + + +/* Return -1 if child not terminated, 2 in case of error, or exit status of + child process 'pid'. Return 0 if child was terminated by SIGPIPE. +*/ +int child_status( const pid_t pid, const char * const name ) + { + int status; + while( true ) + { + const int tmp = waitpid( pid, &status, WNOHANG ); + if( tmp == -1 && errno != EINTR ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Error checking status of '%s': %s\n", + program_name, name, std::strerror( errno ) ); + _exit( 2 ); + } + if( tmp == 0 ) return -1; // child not terminated + if( tmp == pid ) break; // child terminated + } + if( WIFEXITED( status ) ) return WEXITSTATUS( status ); + if( WIFSIGNALED( status ) && WTERMSIG( status ) == SIGPIPE ) return 0; + return 2; + } + +} // end namespace + + +/* Return the number of bytes really read. + If (value returned < size) and (errno == 0), means EOF was reached. +*/ +int readblock( const int fd, uint8_t * const buf, const int size ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = read( fd, buf + sz, size - sz ); + if( n > 0 ) sz += n; + else if( n == 0 ) break; // EOF + else if( errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +/* Return the number of bytes really written. + If (value returned < size), it is always an error. +*/ +int writeblock( const int fd, const uint8_t * const buf, const int size ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = write( fd, buf + sz, size - sz ); + if( n > 0 ) sz += n; + else if( n < 0 && errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +// filename == "-" means stdin. +// +bool feed_data( const std::string & filename, const int infd, const int outfd, + const uint8_t * magic_data, const int magic_size ) + { + if( magic_size && writeblock( outfd, magic_data, magic_size ) != magic_size ) + { show_error( "Write error", errno ); return false; } + enum { buffer_size = 4096 }; + uint8_t buffer[buffer_size]; + while( true ) + { + const int size = readblock( infd, buffer, buffer_size ); + if( size != buffer_size && errno ) + { show_file_error( name_or_stdin( filename.c_str() ), "Read error", + errno ); return false; } + if( size > 0 && writeblock( outfd, buffer, size ) != size ) + { show_error( "Write error", errno ); return false; } + if( size < buffer_size ) break; + } + return true; + } + + +bool good_status( const Children & children, const bool finished ) + { + bool error = false; + for( int i = 0; i < 2; ++i ) + { + const pid_t pid = children.pid[i]; + if( pid ) + { + const char * const name = + ( i == 0 ) ? "data feeder" : children.compressor_name; + // even if compressor finished, trailing data may remain in data feeder + if( i == 0 || !finished ) + { + const int tmp = child_status( pid, name ); // 0 if SIGPIPE + if( tmp < 0 ) // child not terminated + { kill( pid, SIGTERM ); wait_for_child( pid, name ); } + else if( tmp != 0 ) error = true; // child status != 0 + } + else + if( wait_for_child( pid, name ) != 0 ) error = true; + } + } + return !error; + } + + +bool set_data_feeder( const std::string & filename, int * const infdp, + Children & children, int format_index ) + { + uint8_t magic_data[magic_buf_size]; + int magic_size = 0; + if( format_index < 0 ) + format_index = test_format( *infdp, magic_data, &magic_size ); + children.compressor_name = get_compressor_name( format_index ); + + if( children.compressor_name ) // compressed + { + int fda[2]; // pipe from feeder + int fda2[2]; // pipe from compressor + if( pipe( fda ) < 0 || pipe( fda2 ) < 0 ) + { show_error( "Can't create pipe", errno ); return false; } + const int old_infd = *infdp; + *infdp = fda2[0]; + const pid_t pid = fork(); + if( pid == 0 ) // child 1 (compressor feeder) + { + if( close( fda[0] ) != 0 || + close( fda2[0] ) != 0 || close( fda2[1] ) != 0 || + !feed_data( filename, old_infd, fda[1], magic_data, magic_size ) ) + _exit( 2 ); + if( close( fda[1] ) != 0 ) { show_close_error(); _exit( 2 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return false; } + + const pid_t pid2 = fork(); + if( pid2 == 0 ) // child 2 (compressor) + { + if( dup2( fda[0], STDIN_FILENO ) >= 0 && + dup2( fda2[1], STDOUT_FILENO ) >= 0 && + close( fda[0] ) == 0 && close( fda[1] ) == 0 && + close( fda2[0] ) == 0 && close( fda2[1] ) == 0 ) + { + const std::vector< std::string > & compressor_args = + get_compressor_args( format_index ); + const int size = compressor_args.size(); + const char ** const argv = new const char *[size+3]; + argv[0] = children.compressor_name; + for( int i = 0; i < size; ++i ) + argv[i+1] = compressor_args[i].c_str(); + argv[size+1] = ( verbosity >= 0 ) ? "-d" : "-dq"; + argv[size+2] = 0; + execvp( argv[0], (char **)argv ); + } + show_exec_error( children.compressor_name ); + _exit( 2 ); + } + if( pid2 < 0 ) // parent + { show_fork_error( children.compressor_name ); return false; } + + close( fda[0] ); close( fda[1] ); close( fda2[1] ); + children.pid[0] = pid; + children.pid[1] = pid2; + } + else // uncompressed + { + int fda[2]; // pipe from feeder + if( pipe( fda ) < 0 ) + { show_error( "Can't create pipe", errno ); return false; } + const int old_infd = *infdp; + *infdp = fda[0]; + const pid_t pid = fork(); + if( pid == 0 ) // child (feeder) + { + if( close( fda[0] ) != 0 || + !feed_data( filename, old_infd, fda[1], magic_data, magic_size ) ) + _exit( 2 ); + if( close( fda[1] ) != 0 ) { show_close_error(); _exit( 2 ); } + _exit( 0 ); + } + if( pid < 0 ) // parent + { show_fork_error( "data feeder" ); return false; } + close( fda[1] ); + children.pid[0] = pid; + children.pid[1] = 0; + } + return true; + } + + +// Return format_index, or -1 if uncompressed or read error. +// +int test_format( const int infd, uint8_t magic_data[], + int * const magic_sizep ) + { + enum { bzip2_magic_size = 3, + gzip_magic_size = 2, + lzip_magic_size = 5, + xz_magic_size = 5, + zstd_magic_size = 4, + compress_magic_size = 2 }; + const uint8_t bzip2_magic[bzip2_magic_size] = + { 0x42, 0x5A, 0x68 }; // "BZh" + const uint8_t gzip_magic[gzip_magic_size] = + { 0x1F, 0x8B }; + const uint8_t compress_magic[compress_magic_size] = + { 0x1F, 0x9D }; + const uint8_t lzip_magic[lzip_magic_size] = + { 0x4C, 0x5A, 0x49, 0x50, 0x01 }; // "LZIP\001" + const uint8_t xz_magic[xz_magic_size] = + { 0xFD, 0x37, 0x7A, 0x58, 0x5A }; // 0xFD, "7zXZ" + const uint8_t zstd_magic[zstd_magic_size] = + { 0x28, 0xB5, 0x2F, 0xFD }; // 0xFD2FB528 LE + + *magic_sizep = readblock( infd, magic_data, magic_buf_size ); + if( *magic_sizep < magic_buf_size ) + { if( errno ) return -1; // read error + for( int i = *magic_sizep; i < magic_buf_size; ++i ) magic_data[i] = 0; } + // test formats in search order + if( std::memcmp( magic_data, lzip_magic, lzip_magic_size ) == 0 && + isvalid_ds( magic_data[lzip_magic_size] ) ) + return fmt_lz; + if( std::memcmp( magic_data, bzip2_magic, bzip2_magic_size ) == 0 && + magic_data[3] >= '1' && magic_data[3] <= '9' && + ( std::memcmp( magic_data + 4, "1AY&SY", 6 ) == 0 || + std::memcmp( magic_data + 4, "\x17rE8P\x90", 6 ) == 0 ) ) + return fmt_bz2; + if( std::memcmp( magic_data, gzip_magic, gzip_magic_size ) == 0 || + std::memcmp( magic_data, compress_magic, compress_magic_size ) == 0 ) + return fmt_gz; + if( std::memcmp( magic_data, zstd_magic, zstd_magic_size ) == 0 ) + return fmt_zst; + if( std::memcmp( magic_data, xz_magic, xz_magic_size ) == 0 ) + return fmt_xz; + return -1; + } diff --git a/zutils.conf b/zutils.conf new file mode 100644 index 0000000..3dc4ef5 --- /dev/null +++ b/zutils.conf @@ -0,0 +1,18 @@ +# +# Runtime Configuration file for Zutils +# +# Zutils looks for this file in: +# 1 - $XDG_CONFIG_HOME/zutils.conf +# 2 - ${sysconfdir}/zutils.conf +# XDG_CONFIG_HOME defaults to $HOME/.config + +# This file sets the compressor and options to be used for each format. +# The command-line options override compressors specified in this file. +# Syntax: <format> = <compressor> [options] +# Uncomment each line you want to take effect. + +# bz2 = lbzip2 -n2 +# gz = pigz -p2 +# lz = plzip -n2 +# xz = pixz -p2 +# zst = zstd -T2 diff --git a/zutils.h b/zutils.h new file mode 100644 index 0000000..270ae57 --- /dev/null +++ b/zutils.h @@ -0,0 +1,40 @@ +/* Zutils - Utilities dealing with compressed files + Copyright (C) 2009-2024 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +inline const char * name_or_stdin( const char * const name ) + { return ( name[0] == '-' && name[1] == 0 ) ? "(stdin)" : name; } + +int readblock( const int fd, uint8_t * const buf, const int size ); +int writeblock( const int fd, const uint8_t * const buf, const int size ); +bool feed_data( const std::string & filename, const int infd, const int outfd, + const uint8_t * magic_data, const int magic_size ); + +struct Children + { + const char * compressor_name; + pid_t pid[2]; // data feeder, compressor + }; +bool good_status( const Children & children, const bool finished ); +bool set_data_feeder( const std::string & filename, int * const infdp, + Children & children, int format_index ); + +enum { magic_buf_size = 10 }; // >= longest extended magic (bzip2) + +// Return format_index, or -1 if uncompressed. +// +int test_format( const int infd, uint8_t magic_data[], + int * const magic_sizep ); |