diff options
-rw-r--r-- | AUTHORS | 1 | ||||
-rw-r--r-- | COPYING | 338 | ||||
-rw-r--r-- | ChangeLog | 215 | ||||
-rw-r--r-- | INSTALL | 80 | ||||
-rw-r--r-- | Makefile.in | 174 | ||||
-rw-r--r-- | NEWS | 53 | ||||
-rw-r--r-- | README | 97 | ||||
-rw-r--r-- | archive_reader.cc | 273 | ||||
-rw-r--r-- | archive_reader.h | 120 | ||||
-rw-r--r-- | arg_parser.cc | 197 | ||||
-rw-r--r-- | arg_parser.h | 110 | ||||
-rw-r--r-- | common.cc | 151 | ||||
-rw-r--r-- | common_decode.cc | 243 | ||||
-rw-r--r-- | compress.cc | 375 | ||||
-rwxr-xr-x | configure | 200 | ||||
-rw-r--r-- | create.cc | 788 | ||||
-rw-r--r-- | create.h | 48 | ||||
-rw-r--r-- | create_lz.cc | 600 | ||||
-rw-r--r-- | decode.cc | 492 | ||||
-rw-r--r-- | decode.h | 32 | ||||
-rw-r--r-- | decode_lz.cc | 763 | ||||
-rw-r--r-- | delete.cc | 190 | ||||
-rw-r--r-- | delete_lz.cc | 139 | ||||
-rw-r--r-- | doc/tarlz.1 | 177 | ||||
-rw-r--r-- | doc/tarlz.info | 1272 | ||||
-rw-r--r-- | doc/tarlz.texi | 1338 | ||||
-rw-r--r-- | exclude.cc | 54 | ||||
-rw-r--r-- | extended.cc | 415 | ||||
-rw-r--r-- | lzip_index.cc | 221 | ||||
-rw-r--r-- | lzip_index.h | 93 | ||||
-rw-r--r-- | main.cc | 723 | ||||
-rw-r--r-- | tarlz.h | 609 | ||||
-rwxr-xr-x | testsuite/check.sh | 1417 | ||||
-rw-r--r-- | testsuite/dotdot1.tar.lz | bin | 0 -> 139 bytes | |||
-rw-r--r-- | testsuite/dotdot2.tar.lz | bin | 0 -> 140 bytes | |||
-rw-r--r-- | testsuite/dotdot3.tar.lz | bin | 0 -> 141 bytes | |||
-rw-r--r-- | testsuite/dotdot4.tar.lz | bin | 0 -> 140 bytes | |||
-rw-r--r-- | testsuite/dotdot5.tar.lz | bin | 0 -> 139 bytes | |||
-rw-r--r-- | testsuite/eoa_blocks.tar | bin | 0 -> 1024 bytes | |||
-rw-r--r-- | testsuite/eoa_blocks.tar.lz | bin | 0 -> 44 bytes | |||
-rw-r--r-- | testsuite/rbar | 1 | ||||
-rw-r--r-- | testsuite/rbaz | 1 | ||||
-rw-r--r-- | testsuite/rfoo | 1 | ||||
-rw-r--r-- | testsuite/t155.tar | bin | 0 -> 9216 bytes | |||
-rw-r--r-- | testsuite/t155.tar.lz | bin | 0 -> 906 bytes | |||
-rw-r--r-- | testsuite/t155_fv1.tar | bin | 0 -> 10240 bytes | |||
-rw-r--r-- | testsuite/t155_fv1.tar.lz | bin | 0 -> 914 bytes | |||
-rw-r--r-- | testsuite/t155_fv2.tar | bin | 0 -> 10240 bytes | |||
-rw-r--r-- | testsuite/t155_fv2.tar.lz | bin | 0 -> 1042 bytes | |||
-rw-r--r-- | testsuite/t155_fv3.tar | bin | 0 -> 10240 bytes | |||
-rw-r--r-- | testsuite/t155_fv3.tar.lz | bin | 0 -> 915 bytes | |||
-rw-r--r-- | testsuite/t155_fv4.tar.lz | bin | 0 -> 1031 bytes | |||
-rw-r--r-- | testsuite/t155_fv5.tar.lz | bin | 0 -> 1173 bytes | |||
-rw-r--r-- | testsuite/t155_fv6.tar.lz | bin | 0 -> 1031 bytes | |||
-rw-r--r-- | testsuite/tar_in_tlz1.tar.lz | bin | 0 -> 7680 bytes | |||
-rw-r--r-- | testsuite/tar_in_tlz2.tar.lz | bin | 0 -> 7807 bytes | |||
-rw-r--r-- | testsuite/test.txt | 676 | ||||
-rw-r--r-- | testsuite/test.txt.lz | bin | 0 -> 7392 bytes | |||
-rw-r--r-- | testsuite/test.txt.tar | bin | 0 -> 38400 bytes | |||
-rw-r--r-- | testsuite/test.txt.tar.lz | bin | 0 -> 7495 bytes | |||
-rw-r--r-- | testsuite/test3.tar | bin | 0 -> 4096 bytes | |||
-rw-r--r-- | testsuite/test3.tar.lz | bin | 0 -> 356 bytes | |||
-rw-r--r-- | testsuite/test3_bad1.tar | bin | 0 -> 4096 bytes | |||
-rw-r--r-- | testsuite/test3_bad1.tar.lz | bin | 0 -> 356 bytes | |||
-rw-r--r-- | testsuite/test3_bad2.tar | bin | 0 -> 4096 bytes | |||
-rw-r--r-- | testsuite/test3_bad2.tar.lz | bin | 0 -> 356 bytes | |||
-rw-r--r-- | testsuite/test3_bad3.tar | bin | 0 -> 4096 bytes | |||
-rw-r--r-- | testsuite/test3_bad3.tar.lz | bin | 0 -> 356 bytes | |||
-rw-r--r-- | testsuite/test3_bad4.tar | bin | 0 -> 4096 bytes | |||
-rw-r--r-- | testsuite/test3_bad4.tar.lz | bin | 0 -> 356 bytes | |||
-rw-r--r-- | testsuite/test3_bad5.tar | bin | 0 -> 4608 bytes | |||
-rw-r--r-- | testsuite/test3_bad5.tar.lz | bin | 0 -> 356 bytes | |||
-rw-r--r-- | testsuite/test3_bad6.tar.lz | bin | 0 -> 866 bytes | |||
-rw-r--r-- | testsuite/test3_dir.tar | bin | 0 -> 4096 bytes | |||
-rw-r--r-- | testsuite/test3_dir.tar.lz | bin | 0 -> 358 bytes | |||
-rw-r--r-- | testsuite/test3_dot.tar.lz | bin | 0 -> 1126 bytes | |||
-rw-r--r-- | testsuite/test3_em1.tar.lz | bin | 0 -> 392 bytes | |||
-rw-r--r-- | testsuite/test3_em2.tar.lz | bin | 0 -> 392 bytes | |||
-rw-r--r-- | testsuite/test3_em3.tar.lz | bin | 0 -> 392 bytes | |||
-rw-r--r-- | testsuite/test3_em4.tar.lz | bin | 0 -> 392 bytes | |||
-rw-r--r-- | testsuite/test3_em5.tar.lz | bin | 0 -> 392 bytes | |||
-rw-r--r-- | testsuite/test3_em6.tar.lz | bin | 0 -> 500 bytes | |||
-rw-r--r-- | testsuite/test3_eoa1.tar | bin | 0 -> 3072 bytes | |||
-rw-r--r-- | testsuite/test3_eoa1.tar.lz | bin | 0 -> 312 bytes | |||
-rw-r--r-- | testsuite/test3_eoa2.tar | bin | 0 -> 3584 bytes | |||
-rw-r--r-- | testsuite/test3_eoa2.tar.lz | bin | 0 -> 352 bytes | |||
-rw-r--r-- | testsuite/test3_eoa3.tar | bin | 0 -> 4608 bytes | |||
-rw-r--r-- | testsuite/test3_eoa3.tar.lz | bin | 0 -> 396 bytes | |||
-rw-r--r-- | testsuite/test3_eoa4.tar | bin | 0 -> 4096 bytes | |||
-rw-r--r-- | testsuite/test3_eoa4.tar.lz | bin | 0 -> 535 bytes | |||
-rw-r--r-- | testsuite/test3_eoa5.tar.lz | bin | 0 -> 535 bytes | |||
-rw-r--r-- | testsuite/test3_gh1.tar | bin | 0 -> 5120 bytes | |||
-rw-r--r-- | testsuite/test3_gh1.tar.lz | bin | 0 -> 574 bytes | |||
-rw-r--r-- | testsuite/test3_gh2.tar | bin | 0 -> 5120 bytes | |||
-rw-r--r-- | testsuite/test3_gh2.tar.lz | bin | 0 -> 607 bytes | |||
-rw-r--r-- | testsuite/test3_gh3.tar | bin | 0 -> 5120 bytes | |||
-rw-r--r-- | testsuite/test3_gh3.tar.lz | bin | 0 -> 645 bytes | |||
-rw-r--r-- | testsuite/test3_gh4.tar | bin | 0 -> 5120 bytes | |||
-rw-r--r-- | testsuite/test3_gh4.tar.lz | bin | 0 -> 795 bytes | |||
-rw-r--r-- | testsuite/test3_gh5.tar.lz | bin | 0 -> 574 bytes | |||
-rw-r--r-- | testsuite/test3_gh6.tar.lz | bin | 0 -> 521 bytes | |||
-rw-r--r-- | testsuite/test3_nn.tar | bin | 0 -> 4096 bytes | |||
-rw-r--r-- | testsuite/test3_nn.tar.lz | bin | 0 -> 350 bytes | |||
-rw-r--r-- | testsuite/test3_sm1.tar.lz | bin | 0 -> 579 bytes | |||
-rw-r--r-- | testsuite/test3_sm2.tar.lz | bin | 0 -> 612 bytes | |||
-rw-r--r-- | testsuite/test3_sm3.tar.lz | bin | 0 -> 650 bytes | |||
-rw-r--r-- | testsuite/test3_sm4.tar.lz | bin | 0 -> 798 bytes | |||
-rw-r--r-- | testsuite/test_bad1.txt | 307 | ||||
-rw-r--r-- | testsuite/test_bad1.txt.tar | bin | 0 -> 17014 bytes | |||
-rw-r--r-- | testsuite/test_bad1.txt.tar.lz | bin | 0 -> 6000 bytes | |||
-rw-r--r-- | testsuite/test_bad2.txt | 320 | ||||
-rw-r--r-- | testsuite/test_bad2.txt.tar.lz | bin | 0 -> 7495 bytes | |||
-rw-r--r-- | testsuite/tlz_in_tar1.tar | bin | 0 -> 2048 bytes | |||
-rw-r--r-- | testsuite/tlz_in_tar2.tar | bin | 0 -> 3072 bytes | |||
-rw-r--r-- | testsuite/ts_in_link.tar.lz | bin | 0 -> 509 bytes | |||
-rw-r--r-- | testsuite/ug32767.tar.lz | bin | 0 -> 136 bytes | |||
-rw-r--r-- | testsuite/ug32chars.tar.lz | bin | 0 -> 176 bytes |
117 files changed, 13304 insertions, 0 deletions
@@ -0,0 +1 @@ +Tarlz was written by Antonio Diaz Diaz. @@ -0,0 +1,338 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) <year> <name of author> + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..deba87a --- /dev/null +++ b/ChangeLog @@ -0,0 +1,215 @@ +2022-09-23 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.23 released. + * Create and decode the extended records 'atime' and 'mtime'. + * Create and decode the extended records 'uid' and 'gid'. + * New option '--ignore-overflow'. + * Refuse to read/write archive data from/to a terminal. + (Reported by DustDFG). + * main.cc (parse_mtime): Make time of day 'HH:MM:SS' optional. + Accept both space and 'T' as separator between date and time. + (show_option_error): New function showing argument and option name. + * decode.cc (extract_member): Diagnose intermediate directory failure. + Failure to extract a member is no longer fatal. + * decode_lz.cc: Make diagnostics identical to serial decoder. + * common_decode.cc (format_member_name): Improve column alignment. + * create.cc (fill_headers): Improve diagnostic when stat reports a + wrong st_size for a symbolic link. (Reported by Jason Lenz). + Change diagnostic "File is the archive" to "Archive can't contain + itself" following a similar change made by Paul Eggert to GNU tar. + * Don't show "Removing leading '/' from member names." if excluded. + * tarlz.texi: Change GNU Texinfo category from 'Data Compression' + to 'Archiving' to match that of GNU tar. + Use 'end-of-archive' (EOA) instead of 'end-of-file' (EOF). + * main.cc (show_help), tarlz.texi: List operations before options. + * Many small improvements have been made to code and documentation. + +2022-01-05 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.22 released. + * main.cc (getnum): Show option name and valid range if error. + (check_lib): Check that LZ_API_VERSION and LZ_version_string match. + (main): Report an error if -o is used with any operation except -z. + * Set variable LIBS from configure. + +2021-06-14 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.21 released. + * Lzlib 1.12 or newer is now required. + * decode.cc (decode): Skip members without name except when listing. + decode_lz.cc (dworker): Likewise. (Reported by Florian Schmaus). + * New options '-z, --compress' and '-o, --output'. + * New option '--warn-newer'. + * tarlz.texi (Portable character set): Link to moe section on Unicode. + (Invoking tarlz): Document concatenation to standard output. + * check.sh: Fix the '--diff' test on OS/2. + +2021-01-08 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.19 released. + * extended.cc: Print a diagnostic for each unknown keyword found. + * tarlz.h: Add a missing '#include <sys/types.h>'. + +2020-11-21 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.18 released. + * main.cc: New option '--check-lib'. + * Implement multi-threaded '-x, --extract'. + * Don't #include <sys/sysmacros.h> when compiling on OS2. + * delete.cc, delete_lz.cc: Use Archive_reader. + * extract.cc: Rename to decode.cc. + * tarlz.texi: New section 'Limitations of multi-threaded extraction'. + +2020-07-30 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.17 released. + * New option '--mtime'. + * New option '-p, --preserve-permissions'. + * Implement multi-threaded '-d, --diff'. + * list_lz.cc: Rename to decode_lz.cc. + * main.cc (main): Report an error if a file name is empty or if the + archive is specified more than once. + * lzip_index.cc: Improve messages for corruption in last header. + * Don't #include <sys/sysmacros.h> when compiling on BSD. + * tarlz.texi: New chapter 'Internal structure of tarlz'. + +2019-10-08 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.16 released. + * extract.cc (extract_member): Fix call order of chown, chmod. + * delete_lz.cc (delete_members_lz): Return 2 if collective member. + * main.cc: Set a valid invocation_name even if argc == 0. + * #include <sys/sysmacros.h> unconditionally. + * tarlz.texi: New chapter 'Portable character set'. + +2019-04-11 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.15 released. + * New option '--delete' (from uncompressed and --no-solid archives). + * list_lz.cc: Fix MT listing of archives with format violations. + +2019-03-12 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.14 released. + * New option '--exclude'. + * New option '-h, --dereference'. + * Short option name '-h' no longer means '--help'. + * create.cc: Implement '-A, --concatenate' and '-r, --append' to + uncompressed archives and to standard output. + * main.cc: Port option '--out-slots' from plzip. + +2019-02-27 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.13 released. + * create_lz.cc (cworker): Fix skipping of unreadable files. + * list_lz.cc: Fix listing of archives containing empty lzip members. + * create.cc (fill_headers): Store negative mtime as cero. + +2019-02-22 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.12 released. + * create.cc (fill_headers): Fix use of st_rdev instead of st_dev. + * Save just numerical uid/gid if user or group not in database. + * extract.cc (format_member_name): Print devmajor and devminor. + * New option '-d, --diff'. + * New option '--ignore-ids'. + * extract.cc: Fast '-t, --list' on seekable uncompressed archives. + +2019-02-13 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.11 released. + * extract.cc (archive_read): Fix endless loop with empty lz file. + * Implement multi-threaded '-c, --create' and '-r, --append'. + * '--bsolid' is now the default compression granularity. + * create.cc (remove_leading_dotslash): Remember more than one prefix. + * tarlz.texi: New chapter 'Minimum archive sizes'. + +2019-01-31 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.10 released. + * New option '--bsolid'. + * New option '-B, --data-size'. + * create.cc: Set ustar name to zero if extended header is used. + +2019-01-22 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.9 released. + * Implement multi-threaded '-t, --list'. + * New option '-n, --threads'. + * Recognize global pax headers. Ignore them for now. + * strtoul has been replaced with length-safe parsers. + * tarlz.texi: New chapter 'Limitations of parallel tar decoding'. + +2018-12-16 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.8 released. + * New option '--anonymous' (--owner=root --group=root). + * extract.cc (decode): 'tarlz -xf foo ./bar' now extracts 'bar'. + * create.cc: Set to zero most fields in extended headers. + * tarlz.texi: New chapter 'Amendments to pax format'. + +2018-11-23 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.7 released. + * New option '--keep-damaged'. + * New option '--no-solid'. + * create.cc (archive_write): Minimize dictionary size. + * create.cc: Detect and skip archive in '-A', '-c', and '-r'. + * main.cc (show_version): Show the version of lzlib being used. + +2018-10-19 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.6 released. + * New option '-A, --concatenate'. + * Option '--ignore-crc' replaced with '--missing-crc'. + * create.cc (add_member): Verify that uid, gid, mtime, devmajor, + and devminor are in ustar range. + * configure: Accept appending to CXXFLAGS; 'CXXFLAGS+=OPTIONS'. + * Makefile.in: Use tarlz in target 'dist'. + +2018-09-29 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.5 released. + * Implement simplified POSIX pax format. + * Implement CRC32-C (Castagnoli) of the extended header data. + * New option '--ignore-crc'. + * Add missing #includes for major, minor and makedev. + * tarlz.texi: Document the new archive format. + +2018-04-23 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.4 released. + * Add some missing #includes. + * main.cc: Open files in binary mode on OS2. + +2018-03-19 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.3 released. + * Rename project to 'tarlz' from 'pmtar' (Poor Man's Tar). + * New option '-C, --directory'. + * Implement lzip compression of members at archive creation. + * New option '-r, --append'. + * New options '--owner' and '--group'. + * New options '--asolid', '--dsolid', and '--solid'. + * Implement file appending to compressed archive. + * Implement transparent decompression of the archive. + * Implement skipping over damaged (un)compressed members. + * Implement recursive extraction/listing of directories. + * Implement verbose extract/list output. + * tarlz.texi: New file. + +2014-01-22 Antonio Diaz Diaz <antonio@gnu.org> + + * Version 0.2 released. + * configure: Options now accept a separate argument. + +2013-02-16 Antonio Diaz Diaz <ant_diaz@teleline.es> + + * Version 0.1 released. + + +Copyright (C) 2013-2022 Antonio Diaz Diaz. + +This file is a collection of facts, and thus it is not copyrightable, +but just in case, you have unlimited permission to copy, distribute, and +modify it. @@ -0,0 +1,80 @@ +Requirements +------------ +You will need a C++98 compiler with support for 'long long', and the +compression library lzlib installed. (gcc 3.3.6 or newer is recommended). +I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards +compliant compiler. + +Lzlib must be version 1.12 or newer. + +Gcc is available at http://gcc.gnu.org. +Lzlib is available at http://www.nongnu.org/lzip/lzlib.html. + +The operating system must allow signal handlers read access to objects with +static storage duration so that the cleanup handler for Control-C can delete +the partial output file in '-z, --compress' mode. + + +Procedure +--------- +1. Unpack the archive if you have not done so already: + + tar -xf tarlz[version].tar.lz +or + lzip -cd tarlz[version].tar.lz | tar -xf - + +This creates the directory ./tarlz[version] containing the source from +the main archive. + +2. Change to tarlz directory and run configure. + (Try 'configure --help' for usage instructions). + + cd tarlz[version] + ./configure + + To link against a lzlib not installed in a standard place, use: + + ./configure CPPFLAGS='-I <includedir>' LDFLAGS='-L <libdir>' + + (Replace <includedir> with the directory containing the file lzlib.h, + and <libdir> with the directory containing the file liblz.a). + +3. Run make. + + make + +4. Optionally, type 'make check' to run the tests that come with tarlz. + +5. Type 'make install' to install the program and any data files and + documentation. + + Or type 'make install-compress', which additionally compresses the + info manual and the man page after installation. + (Installing compressed docs may become the default in the future). + + You can install only the program, the info manual, or the man page by + typing 'make install-bin', 'make install-info', or 'make install-man' + respectively. + + +Another way +----------- +You can also compile tarlz into a separate directory. +To do this, you must use a version of 'make' that supports the variable +'VPATH', such as GNU 'make'. 'cd' to the directory where you want the +object files and executables to go and run the 'configure' script. +'configure' automatically checks for the source code in '.', in '..', and +in the directory that 'configure' is in. + +'configure' recognizes the option '--srcdir=DIR' to control where to +look for the sources. Usually 'configure' can determine that directory +automatically. + +After running 'configure', you can run 'make' and 'make install' as +explained above. + + +Copyright (C) 2013-2022 Antonio Diaz Diaz. + +This file is free documentation: you have unlimited permission to copy, +distribute, and modify it. diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 0000000..0fa5761 --- /dev/null +++ b/Makefile.in @@ -0,0 +1,174 @@ + +DISTNAME = $(pkgname)-$(pkgversion) +INSTALL = install +INSTALL_PROGRAM = $(INSTALL) -m 755 +INSTALL_DATA = $(INSTALL) -m 644 +INSTALL_DIR = $(INSTALL) -d -m 755 +SHELL = /bin/sh +CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 + +objs = arg_parser.o lzip_index.o archive_reader.o common.o common_decode.o \ + compress.o create.o create_lz.o decode.o decode_lz.o delete.o \ + delete_lz.o exclude.o extended.o main.o + + +.PHONY : all install install-bin install-info install-man \ + install-strip install-compress install-strip-compress \ + install-bin-strip install-info-compress install-man-compress \ + uninstall uninstall-bin uninstall-info uninstall-man \ + doc info man check dist clean distclean + +all : $(progname) + +$(progname) : $(objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs) $(LIBS) + +main.o : main.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< + +%.o : %.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< + +$(objs) : Makefile +arg_parser.o : arg_parser.h +archive_reader.o : tarlz.h lzip_index.h archive_reader.h +common.o : tarlz.h arg_parser.h +common_decode.o : tarlz.h arg_parser.h +compress.o : tarlz.h arg_parser.h +create.o : tarlz.h arg_parser.h create.h +create_lz.o : tarlz.h arg_parser.h create.h +decode.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h decode.h +decode_lz.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h decode.h +delete.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h +delete_lz.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h +exclude.o : tarlz.h +extended.o : tarlz.h +lzip_index.o : tarlz.h lzip_index.h +main.o : tarlz.h arg_parser.h + + +doc : info man + +info : $(VPATH)/doc/$(pkgname).info + +$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi + cd $(VPATH)/doc && makeinfo $(pkgname).texi + +man : $(VPATH)/doc/$(progname).1 + +$(VPATH)/doc/$(progname).1 : $(progname) + help2man -n 'creates tar archives with multimember lzip compression' \ + -o $@ ./$(progname) + +Makefile : $(VPATH)/configure $(VPATH)/Makefile.in + ./config.status + +check : all + @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion) + +install : install-bin install-info install-man +install-strip : install-bin-strip install-info install-man +install-compress : install-bin install-info-compress install-man-compress +install-strip-compress : install-bin-strip install-info-compress install-man-compress + +install-bin : all + if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi + $(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)" + +install-bin-strip : all + $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install-bin + +install-info : + if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi + -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* + $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info" + -if $(CAN_RUN_INSTALLINFO) ; then \ + install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + fi + +install-info-compress : install-info + lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info" + +install-man : + if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi + -rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"* + $(INSTALL_DATA) $(VPATH)/doc/$(progname).1 "$(DESTDIR)$(mandir)/man1/$(progname).1" + +install-man-compress : install-man + lzip -v -9 "$(DESTDIR)$(mandir)/man1/$(progname).1" + +uninstall : uninstall-man uninstall-info uninstall-bin + +uninstall-bin : + -rm -f "$(DESTDIR)$(bindir)/$(progname)" + +uninstall-info : + -if $(CAN_RUN_INSTALLINFO) ; then \ + install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + fi + -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* + +uninstall-man : + -rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"* + +dist : doc + ln -sf $(VPATH) $(DISTNAME) + tarlz --solid --anonymous -9cvf $(DISTNAME).tar.lz \ + $(DISTNAME)/AUTHORS \ + $(DISTNAME)/COPYING \ + $(DISTNAME)/ChangeLog \ + $(DISTNAME)/INSTALL \ + $(DISTNAME)/Makefile.in \ + $(DISTNAME)/NEWS \ + $(DISTNAME)/README \ + $(DISTNAME)/configure \ + $(DISTNAME)/doc/$(progname).1 \ + $(DISTNAME)/doc/$(pkgname).info \ + $(DISTNAME)/doc/$(pkgname).texi \ + $(DISTNAME)/*.h \ + $(DISTNAME)/*.cc \ + $(DISTNAME)/testsuite/check.sh \ + $(DISTNAME)/testsuite/test.txt \ + $(DISTNAME)/testsuite/test.txt.tar \ + $(DISTNAME)/testsuite/test_bad1.txt.tar \ + $(DISTNAME)/testsuite/test_bad[12].txt \ + $(DISTNAME)/testsuite/rfoo \ + $(DISTNAME)/testsuite/rbar \ + $(DISTNAME)/testsuite/rbaz \ + $(DISTNAME)/testsuite/test3.tar \ + $(DISTNAME)/testsuite/test3_nn.tar \ + $(DISTNAME)/testsuite/test3_eoa[1-4].tar \ + $(DISTNAME)/testsuite/test3_gh[1-4].tar \ + $(DISTNAME)/testsuite/test3_bad[1-5].tar \ + $(DISTNAME)/testsuite/test3_dir.tar \ + $(DISTNAME)/testsuite/t155.tar \ + $(DISTNAME)/testsuite/t155_fv[1-3].tar \ + $(DISTNAME)/testsuite/eoa_blocks.tar \ + $(DISTNAME)/testsuite/test.txt.lz \ + $(DISTNAME)/testsuite/test.txt.tar.lz \ + $(DISTNAME)/testsuite/test_bad[12].txt.tar.lz \ + $(DISTNAME)/testsuite/test3.tar.lz \ + $(DISTNAME)/testsuite/test3_eoa[1-5].tar.lz \ + $(DISTNAME)/testsuite/test3_em[1-6].tar.lz \ + $(DISTNAME)/testsuite/test3_gh[1-6].tar.lz \ + $(DISTNAME)/testsuite/test3_nn.tar.lz \ + $(DISTNAME)/testsuite/test3_sm[1-4].tar.lz \ + $(DISTNAME)/testsuite/test3_bad[1-6].tar.lz \ + $(DISTNAME)/testsuite/test3_dir.tar.lz \ + $(DISTNAME)/testsuite/test3_dot.tar.lz \ + $(DISTNAME)/testsuite/tar_in_tlz[12].tar.lz \ + $(DISTNAME)/testsuite/tlz_in_tar[12].tar \ + $(DISTNAME)/testsuite/ts_in_link.tar.lz \ + $(DISTNAME)/testsuite/t155.tar.lz \ + $(DISTNAME)/testsuite/t155_fv[1-6].tar.lz \ + $(DISTNAME)/testsuite/dotdot[1-5].tar.lz \ + $(DISTNAME)/testsuite/ug32767.tar.lz \ + $(DISTNAME)/testsuite/ug32chars.tar.lz \ + $(DISTNAME)/testsuite/eoa_blocks.tar.lz + rm -f $(DISTNAME) + +clean : + -rm -f $(progname) $(objs) + +distclean : clean + -rm -f Makefile config.status *.tar *.tar.lz @@ -0,0 +1,53 @@ +Changes in version 0.23: + +Tarlz now can create and decode the extended records 'atime' and 'mtime', +allowing times beyond the ustar range (before 1970-01-01 00:00:00 UTC or +after 2242-03-16 12:56:31 UTC). + +Tarlz now can create and decode the extended records 'uid' and 'gid', +allowing user and group IDs beyond the ustar limit of 2_097_151. + +The new option '--ignore-overflow', which makes '-d, --diff' ignore +differences in mtime caused by overflow on 32-bit systems, has been added. + +Tarlz now refuses to read archive data from a terminal or write archive data +to a terminal. (Reported by DustDFG). + +In the date format of option '--mtime' the time of day 'HH:MM:SS' is now +optional and defaults to '00:00:00'. Both space and 'T' are now accepted as +separator between date and time. + +Diagnostics caused by invalid arguments to command line options now show the +argument and the name of the option. + +Tarlz now diagnoses separately the failure to create an intermediate +directory during extraction. + +Failure to extract a member due to environmental problems is no longer fatal +in serial extraction. (It was already non-fatal in parallel extraction). + +The diagnostics emitted by the parallel decoder should now be identical to +the corresponding diagnostics of the serial decoder. + +Column alignment has been improved in listings by printing "user/group size" +in a field of minimum width 19 with at least 8 characters for size. + +The diagnostic shown when the filesystem reports a wrong st_size for a +symbolic link has been improved. (Reported by Jason Lenz). + +The diagnostic "File is the archive" has been changed to "Archive can't +contain itself" following a similar change made by Paul Eggert to GNU tar. + +The warning "Removing leading '/' from member names." is now not shown when +compressing nor if the member causing it is excluded. + +The texinfo category of the manual has been changed from 'Data Compression' +to 'Archiving' to match that of GNU tar. + +'end-of-archive' (EOA) is now used consistently to refer to the blocks of +binary zeros used to mark the end of the archive. + +Operations are now listed before options in the --help output and in the +manual. + +Many small improvements have been made to the code and documentation. @@ -0,0 +1,97 @@ +Description + +Tarlz is a massively parallel (multi-threaded) combined implementation of +the tar archiver and the lzip compressor. Tarlz uses the compression library +lzlib. + +Tarlz creates tar archives using a simplified and safer variant of the POSIX +pax format compressed in lzip format, keeping the alignment between tar +members and lzip members. The resulting multimember tar.lz archive is fully +backward compatible with standard tar tools like GNU tar, which treat it +like any other tar.lz archive. Tarlz can append files to the end of such +compressed archives. + +Keeping the alignment between tar members and lzip members has two +advantages. It adds an indexed lzip layer on top of the tar archive, making +it possible to decode the archive safely in parallel. It also minimizes the +amount of data lost in case of corruption. Compressing a tar archive with +plzip may even double the amount of files lost for each lzip member damaged +because it does not keep the members aligned. + +Tarlz can create tar archives with five levels of compression granularity: +per file (--no-solid), per block (--bsolid, default), per directory +(--dsolid), appendable solid (--asolid), and solid (--solid). It can also +create uncompressed tar archives. + +Of course, compressing each file (or each directory) individually can't +achieve a compression ratio as high as compressing solidly the whole tar +archive, but it has the following advantages: + + * The resulting multimember tar.lz archive can be decompressed in + parallel, multiplying the decompression speed. + + * New members can be appended to the archive (by removing the + end-of-archive member), and unwanted members can be deleted from the + archive. Just like an uncompressed tar archive. + + * It is a safe POSIX-style backup format. In case of corruption, tarlz + can extract all the undamaged members from the tar.lz archive, + skipping over the damaged members, just like the standard + (uncompressed) tar. Moreover, the option '--keep-damaged' can be used + to recover as much data as possible from each damaged member, and + lziprecover can be used to recover some of the damaged members. + + * A multimember tar.lz archive is usually smaller than the corresponding + solidly compressed tar.gz archive, except when individually + compressing files smaller than about 32 KiB. + +Note that the POSIX pax format has a serious flaw. The metadata stored in +pax extended records are not protected by any kind of check sequence. +Corruption in a long file name may cause the extraction of the file in the +wrong place without warning. Corruption in a large file size may cause the +truncation of the file or the appending of garbage to the file, both +followed by a spurious warning about a corrupt header far from the place of +the undetected corruption. + +Metadata like file name and file size must be always protected in an archive +format because of the adverse effects of undetected corruption in them, +potentially much worse that undetected corruption in the data. Even more so +in the case of pax because the amount of metadata it stores is potentially +large, making undetected corruption and archiver misbehavior more probable. + +Headers and metadata must be protected separately from data because the +integrity checking of lzip may not be able to detect the corruption before +the metadata has been used, for example, to create a new file in the wrong +place. + +Because of the above, tarlz protects the extended records with a Cyclic +Redundancy Check (CRC) in a way compatible with standard tar tools. + +Tarlz does not understand other tar formats like gnu, oldgnu, star or v7. +The command 'tarlz -tf archive.tar.lz > /dev/null' can be used to verify +that the format of the archive is compatible with tarlz. + +The diagram below shows the correspondence between each tar member (formed +by one or two headers plus optional data) in the tar archive and each lzip +member in the resulting multimember tar.lz archive, when per file +compression is used: + +tar ++========+======+=================+===============+========+======+========+ +| header | data | extended header | extended data | header | data | EOA | ++========+======+=================+===============+========+======+========+ + +tar.lz ++===============+=================================================+========+ +| member | member | member | ++===============+=================================================+========+ + + +Copyright (C) 2013-2022 Antonio Diaz Diaz. + +This file is free documentation: you have unlimited permission to copy, +distribute, and modify it. + +The file Makefile.in is a data file used by configure to produce the +Makefile. It has the same copyright owner and permissions that configure +itself. diff --git a/archive_reader.cc b/archive_reader.cc new file mode 100644 index 0000000..8ad315d --- /dev/null +++ b/archive_reader.cc @@ -0,0 +1,273 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cerrno> +#include <stdint.h> // for lzlib.h +#include <unistd.h> +#include <lzlib.h> + +#include "tarlz.h" +#include "lzip_index.h" +#include "archive_reader.h" + + +namespace { + +const char * const rdaerr_msg = "Error reading archive"; + +/* Return the number of bytes really read. + If (value returned < size) and (errno == 0), means EOF was reached. +*/ +int preadblock( const int fd, uint8_t * const buf, const int size, + const long long pos ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = pread( fd, buf + sz, size - sz, pos + sz ); + if( n > 0 ) sz += n; + else if( n == 0 ) break; // EOF + else if( errno != EINTR ) break; + errno = 0; + } + return sz; + } + +int non_tty_infd( const std::string & archive_name, const char * const namep ) + { + int infd = archive_name.empty() ? STDIN_FILENO : open_instream( archive_name ); + if( infd >= 0 && isatty( infd ) ) // for example /dev/tty + { show_file_error( namep, archive_name.empty() ? + "I won't read archive data from a terminal (missing -f option?)" : + "I won't read archive data from a terminal." ); + close( infd ); infd = -1; } + return infd; + } + + +void xLZ_decompress_write( LZ_Decoder * const decoder, + const uint8_t * const buffer, const int size ) + { + if( LZ_decompress_write( decoder, buffer, size ) != size ) + internal_error( "library error (LZ_decompress_write)." ); + } + +} // end namespace + + +Archive_descriptor::Archive_descriptor( const std::string & archive_name ) + : name( archive_name ), namep( name.empty() ? "(stdin)" : name.c_str() ), + infd( non_tty_infd( archive_name, namep ) ), + lzip_index( infd, true, false ), + seekable( lseek( infd, 0, SEEK_SET ) == 0 ), + indexed( seekable && lzip_index.retval() == 0 ) {} + + +int Archive_reader_base::parse_records( Extended & extended, + const Tar_header header, + Resizable_buffer & rbuf, + const char * const default_msg, + const bool permissive ) + { + const long long edsize = parse_octal( header + size_o, size_l ); + const long long bufsize = round_up( edsize ); + if( edsize <= 0 ) return err( 2, misrec_msg ); // no extended records + if( edsize >= 1LL << 33 || bufsize >= INT_MAX ) + return err( -2, longrec_msg ); // records too long + if( !rbuf.resize( bufsize ) ) return err( -1, mem_msg ); + e_msg_ = ""; e_code_ = 0; + int retval = read( rbuf.u8(), bufsize ); // extended records buffer + if( retval == 0 && !extended.parse( rbuf(), edsize, permissive ) ) + retval = 2; + if( retval && !*e_msg_ ) e_msg_ = default_msg; + return retval; + } + + +/* Read 'size' uncompressed bytes, decompressing the input if needed. + Return value: 0 = OK, 1 = OOM or read error, 2 = EOF or invalid data. */ +int Archive_reader::read( uint8_t * const buf, const int size ) + { + if( first_read ) // check format + { + first_read = false; + uncompressed_seekable = ad.seekable && !ad.indexed && + ad.lzip_index.file_size() > 3 * header_size; + if( size != header_size ) + internal_error( "size != header_size on first call." ); + const int rd = readblock( ad.infd, buf, size ); + if( rd != size && errno ) return err( -1, rdaerr_msg, errno, rd ); + const Lzip_header & header = (*(const Lzip_header *)buf); + const bool islz = ( rd >= min_member_size && header.verify_magic() && + header.verify_version() && + isvalid_ds( header.dictionary_size() ) ); + const bool istar = ( rd == size && verify_ustar_chksum( buf ) ); + const bool iseoa = + ( !islz && !istar && rd == size && block_is_zero( buf, size ) ); + bool maybe_lz = islz; // maybe corrupt tar.lz + if( !islz && !istar && !iseoa ) // corrupt or invalid format + { + const bool lz_ext = has_lz_ext( ad.name ); + show_file_error( ad.namep, lz_ext ? posix_lz_msg : posix_msg ); + if( lz_ext && rd >= min_member_size ) maybe_lz = true; + else return err( 2 ); + } + if( !maybe_lz ) // uncompressed + { if( rd == size ) return 0; + return err( -2, "EOF reading archive.", 0, rd ); } + uncompressed_seekable = false; // compressed + decoder = LZ_decompress_open(); + if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) + { LZ_decompress_close( decoder ); decoder = 0; return err( -1, mem_msg ); } + xLZ_decompress_write( decoder, buf, rd ); + const int ret = read( buf, size ); if( ret != 0 ) return ret; + if( verify_ustar_chksum( buf ) || block_is_zero( buf, size ) ) return 0; + return err( 2, islz ? posix_lz_msg : "" ); + } + + if( !decoder ) // uncompressed + { + const int rd = readblock( ad.infd, buf, size ); + if( rd == size ) return 0; else return err( -2, end_msg, 0, rd ); + } + const int ibuf_size = 16384; + uint8_t ibuf[ibuf_size]; + int sz = 0; + while( sz < size ) + { + const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); + if( rd < 0 ) + { + const unsigned long long old_pos = LZ_decompress_total_in_size( decoder ); + if( LZ_decompress_sync_to_member( decoder ) < 0 ) + internal_error( "library error (LZ_decompress_sync_to_member)." ); + e_skip_ = true; set_error_status( 2 ); + const unsigned long long new_pos = LZ_decompress_total_in_size( decoder ); + // lzlib < 1.8 does not update total_in_size when syncing to member + if( new_pos >= old_pos && new_pos < LLONG_MAX ) + return err( 2, "", 0, sz, true ); + return err( -1, "Skipping to next header failed. " + "Lzlib 1.8 or newer required.", 0, sz ); + } + if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) + { return err( -2, end_msg, 0, sz ); } + sz += rd; + if( sz < size && !at_eof && LZ_decompress_write_size( decoder ) > 0 ) + { + const int rsize = std::min( ibuf_size, LZ_decompress_write_size( decoder ) ); + const int rd = readblock( ad.infd, ibuf, rsize ); + xLZ_decompress_write( decoder, ibuf, rd ); + if( rd < rsize ) + { + at_eof = true; LZ_decompress_finish( decoder ); + if( errno ) return err( -1, rdaerr_msg, errno, sz ); + } + } + } + return 0; + } + + +int Archive_reader::skip_member( const Extended & extended ) + { + if( extended.file_size() <= 0 ) return 0; + long long rest = round_up( extended.file_size() ); // size + padding + if( uncompressed_seekable && lseek( ad.infd, rest, SEEK_CUR ) > 0 ) return 0; + const int bufsize = 32 * header_size; + uint8_t buf[bufsize]; + while( rest > 0 ) // skip tar member + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest; + const int ret = read( buf, rsize ); + if( ret != 0 ) return ret; + rest -= rsize; + } + return 0; + } + + +void Archive_reader_i::set_member( const long i ) + { + LZ_decompress_reset( decoder ); // prepare for new member + data_pos_ = ad.lzip_index.dblock( i ).pos(); + mdata_end_ = ad.lzip_index.dblock( i ).end(); + archive_pos = ad.lzip_index.mblock( i ).pos(); + member_id = i; + } + + +/* Read 'size' decompressed bytes from the archive. + Return value: 0 = OK, 1 = OOM or read error, 2 = EOF or invalid data. */ +int Archive_reader_i::read( uint8_t * const buf, const int size ) + { + int sz = 0; + + while( sz < size ) + { + const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); + if( rd < 0 ) + return err( 2, LZ_strerror( LZ_decompress_errno( decoder ) ), 0, sz ); + if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) + return err( -2, end_msg, 0, sz ); + sz += rd; data_pos_ += rd; + if( sz < size && LZ_decompress_write_size( decoder ) > 0 ) + { + const long long ibuf_size = 16384; + uint8_t ibuf[ibuf_size]; + const long long member_end = ad.lzip_index.mblock( member_id ).end(); + const long long rest = ( ( archive_pos < member_end ) ? + member_end : ad.lzip_index.cdata_size() ) - archive_pos; + const int rsize = std::min( LZ_decompress_write_size( decoder ), + (int)std::min( ibuf_size, rest ) ); + if( rsize <= 0 ) LZ_decompress_finish( decoder ); + else + { + const int rd = preadblock( ad.infd, ibuf, rsize, archive_pos ); + xLZ_decompress_write( decoder, ibuf, rd ); + archive_pos += rd; + if( rd < rsize ) + { + LZ_decompress_finish( decoder ); + if( errno ) return err( -1, rdaerr_msg, errno, sz ); + } + } + } + } + return 0; + } + + +int Archive_reader_i::skip_member( const Extended & extended ) + { + if( extended.file_size() <= 0 ) return 0; + long long rest = round_up( extended.file_size() ); // size + padding + if( data_pos_ + rest == mdata_end_ ) { data_pos_ = mdata_end_; return 0; } + const int bufsize = 32 * header_size; + uint8_t buf[bufsize]; + while( rest > 0 ) // skip tar member + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest; + const int ret = read( buf, rsize ); + if( ret != 0 ) return ret; + rest -= rsize; + } + return 0; + } diff --git a/archive_reader.h b/archive_reader.h new file mode 100644 index 0000000..47fa844 --- /dev/null +++ b/archive_reader.h @@ -0,0 +1,120 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +struct Archive_descriptor + { + const std::string name; + const char * const namep; // printable archive name + const int infd; + const Lzip_index lzip_index; + const bool seekable; + const bool indexed; // archive is a compressed regular file + + Archive_descriptor( const std::string & archive_name ); + }; + + +class Archive_reader_base // base of serial and indexed readers + { +public: + const Archive_descriptor & ad; +protected: + LZ_Decoder * decoder; // destructor closes it if needed + const char * e_msg_; // message for show_file_error + int e_code_; // copy of errno + int e_size_; // partial size read in case of read error + bool e_skip_; // corrupt header skipped + bool fatal_; + + int err( const int retval, const char * const msg = "", const int code = 0, + const int size = 0, const bool skip = false ) + { e_msg_ = msg; e_code_ = code; e_size_ = size; e_skip_ = skip; + if( retval >= 0 ) return retval; + fatal_ = true; if( !*e_msg_ ) e_msg_ = "Fatal error"; return -retval; } + + Archive_reader_base( const Archive_descriptor & d ) + : ad( d ), decoder( 0 ), e_msg_( "" ), e_code_( 0 ), e_size_( 0 ), + e_skip_( false ), fatal_( false ) {} + +public: + virtual ~Archive_reader_base() + { if( decoder != 0 ) LZ_decompress_close( decoder ); } + + const char * e_msg() const { return e_msg_; } + int e_code() const { return e_code_; } + int e_size() const { return e_size_; } + bool e_skip() const { return e_skip_; } + bool fatal() const { return fatal_; } + + /* Read 'size' uncompressed bytes, decompressing the input if needed. + Return value: 0 = OK, 1 = OOM or read error, 2 = EOF or invalid data. + If !OK, fills all the e_* variables. */ + virtual int read( uint8_t * const buf, const int size ) = 0; + + int parse_records( Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const char * const default_msg, + const bool permissive ); + }; + + +class Archive_reader : public Archive_reader_base // serial reader + { + bool first_read; + bool uncompressed_seekable; // value set by first read call + bool at_eof; + +public: + Archive_reader( const Archive_descriptor & d ) + : Archive_reader_base( d ), first_read( true ), + uncompressed_seekable( false ), at_eof( false ) {} + + int read( uint8_t * const buf, const int size ); + int skip_member( const Extended & extended ); + }; + + +/* If the archive is compressed seekable (indexed), several indexed readers + can be constructed sharing the same Archive_descriptor, for example to + decode the archive in parallel. +*/ +class Archive_reader_i : public Archive_reader_base // indexed reader + { + long long data_pos_; // current decompressed position in archive + long long mdata_end_; // current member decompressed end + long long archive_pos; // current position in archive for pread + long member_id; // current member unless reading beyond + +public: + Archive_reader_i( const Archive_descriptor & d ) + : Archive_reader_base( d ), + data_pos_( 0 ), mdata_end_( 0 ), archive_pos( 0 ), member_id( 0 ) + { + decoder = LZ_decompress_open(); + if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) + { LZ_decompress_close( decoder ); decoder = 0; fatal_ = true; } + } + + long long data_pos() const { return data_pos_; } + long long mdata_end() const { return mdata_end_; } + bool at_member_end() const { return data_pos_ == mdata_end_; } + + // Resets decoder and sets position to the start of the member. + void set_member( const long i ); + + int read( uint8_t * const buf, const int size ); + int skip_member( const Extended & extended ); + }; diff --git a/arg_parser.cc b/arg_parser.cc new file mode 100644 index 0000000..59998ac --- /dev/null +++ b/arg_parser.cc @@ -0,0 +1,197 @@ +/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) + Copyright (C) 2006-2022 Antonio Diaz Diaz. + + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#include <cstring> +#include <string> +#include <vector> + +#include "arg_parser.h" + + +bool Arg_parser::parse_long_option( const char * const opt, const char * const arg, + const Option options[], int & argind ) + { + unsigned len; + int index = -1; + bool exact = false, ambig = false; + + for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ; + + // Test all long options for either exact match or abbreviated matches. + for( int i = 0; options[i].code != 0; ++i ) + if( options[i].long_name && + std::strncmp( options[i].long_name, &opt[2], len ) == 0 ) + { + if( std::strlen( options[i].long_name ) == len ) // Exact match found + { index = i; exact = true; break; } + else if( index < 0 ) index = i; // First nonexact match found + else if( options[index].code != options[i].code || + options[index].has_arg != options[i].has_arg ) + ambig = true; // Second or later nonexact match found + } + + if( ambig && !exact ) + { + error_ = "option '"; error_ += opt; error_ += "' is ambiguous"; + return false; + } + + if( index < 0 ) // nothing found + { + error_ = "unrecognized option '"; error_ += opt; error_ += '\''; + return false; + } + + ++argind; + data.push_back( Record( options[index].code, options[index].long_name ) ); + + if( opt[len+2] ) // '--<long_option>=<argument>' syntax + { + if( options[index].has_arg == no ) + { + error_ = "option '--"; error_ += options[index].long_name; + error_ += "' doesn't allow an argument"; + return false; + } + if( options[index].has_arg == yes && !opt[len+3] ) + { + error_ = "option '--"; error_ += options[index].long_name; + error_ += "' requires an argument"; + return false; + } + data.back().argument = &opt[len+3]; + return true; + } + + if( options[index].has_arg == yes ) + { + if( !arg || !arg[0] ) + { + error_ = "option '--"; error_ += options[index].long_name; + error_ += "' requires an argument"; + return false; + } + ++argind; data.back().argument = arg; + return true; + } + + return true; + } + + +bool Arg_parser::parse_short_option( const char * const opt, const char * const arg, + const Option options[], int & argind ) + { + int cind = 1; // character index in opt + + while( cind > 0 ) + { + int index = -1; + const unsigned char c = opt[cind]; + + if( c != 0 ) + for( int i = 0; options[i].code; ++i ) + if( c == options[i].code ) + { index = i; break; } + + if( index < 0 ) + { + error_ = "invalid option -- '"; error_ += c; error_ += '\''; + return false; + } + + data.push_back( Record( c ) ); + if( opt[++cind] == 0 ) { ++argind; cind = 0; } // opt finished + + if( options[index].has_arg != no && cind > 0 && opt[cind] ) + { + data.back().argument = &opt[cind]; ++argind; cind = 0; + } + else if( options[index].has_arg == yes ) + { + if( !arg || !arg[0] ) + { + error_ = "option requires an argument -- '"; error_ += c; + error_ += '\''; + return false; + } + data.back().argument = arg; ++argind; cind = 0; + } + } + return true; + } + + +Arg_parser::Arg_parser( const int argc, const char * const argv[], + const Option options[], const bool in_order ) + { + if( argc < 2 || !argv || !options ) return; + + std::vector< const char * > non_options; // skipped non-options + int argind = 1; // index in argv + + while( argind < argc ) + { + const unsigned char ch1 = argv[argind][0]; + const unsigned char ch2 = ch1 ? argv[argind][1] : 0; + + if( ch1 == '-' && ch2 ) // we found an option + { + const char * const opt = argv[argind]; + const char * const arg = ( argind + 1 < argc ) ? argv[argind+1] : 0; + if( ch2 == '-' ) + { + if( !argv[argind][2] ) { ++argind; break; } // we found "--" + else if( !parse_long_option( opt, arg, options, argind ) ) break; + } + else if( !parse_short_option( opt, arg, options, argind ) ) break; + } + else + { + if( in_order ) data.push_back( Record( argv[argind++] ) ); + else non_options.push_back( argv[argind++] ); + } + } + if( !error_.empty() ) data.clear(); + else + { + for( unsigned i = 0; i < non_options.size(); ++i ) + data.push_back( Record( non_options[i] ) ); + while( argind < argc ) + data.push_back( Record( argv[argind++] ) ); + } + } + + +Arg_parser::Arg_parser( const char * const opt, const char * const arg, + const Option options[] ) + { + if( !opt || !opt[0] || !options ) return; + + if( opt[0] == '-' && opt[1] ) // we found an option + { + int argind = 1; // dummy + if( opt[1] == '-' ) + { if( opt[2] ) parse_long_option( opt, arg, options, argind ); } + else + parse_short_option( opt, arg, options, argind ); + if( !error_.empty() ) data.clear(); + } + else data.push_back( Record( opt ) ); + } diff --git a/arg_parser.h b/arg_parser.h new file mode 100644 index 0000000..e854838 --- /dev/null +++ b/arg_parser.h @@ -0,0 +1,110 @@ +/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) + Copyright (C) 2006-2022 Antonio Diaz Diaz. + + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +/* Arg_parser reads the arguments in 'argv' and creates a number of + option codes, option arguments, and non-option arguments. + + In case of error, 'error' returns a non-empty error message. + + 'options' is an array of 'struct Option' terminated by an element + containing a code which is zero. A null long_name means a short-only + option. A code value outside the unsigned char range means a long-only + option. + + Arg_parser normally makes it appear as if all the option arguments + were specified before all the non-option arguments for the purposes + of parsing, even if the user of your program intermixed option and + non-option arguments. If you want the arguments in the exact order + the user typed them, call 'Arg_parser' with 'in_order' = true. + + The argument '--' terminates all options; any following arguments are + treated as non-option arguments, even if they begin with a hyphen. + + The syntax for optional option arguments is '-<short_option><argument>' + (without whitespace), or '--<long_option>=<argument>'. +*/ + +class Arg_parser + { +public: + enum Has_arg { no, yes, maybe }; + + struct Option + { + int code; // Short option letter or code ( code != 0 ) + const char * long_name; // Long option name (maybe null) + Has_arg has_arg; + }; + +private: + struct Record + { + int code; + std::string parsed_name; + std::string argument; + explicit Record( const unsigned char c ) + : code( c ), parsed_name( "-" ) { parsed_name += c; } + Record( const int c, const char * const long_name ) + : code( c ), parsed_name( "--" ) { parsed_name += long_name; } + explicit Record( const char * const arg ) : code( 0 ), argument( arg ) {} + }; + + const std::string empty_arg; + std::string error_; + std::vector< Record > data; + + bool parse_long_option( const char * const opt, const char * const arg, + const Option options[], int & argind ); + bool parse_short_option( const char * const opt, const char * const arg, + const Option options[], int & argind ); + +public: + Arg_parser( const int argc, const char * const argv[], + const Option options[], const bool in_order = false ); + + // Restricted constructor. Parses a single token and argument (if any). + Arg_parser( const char * const opt, const char * const arg, + const Option options[] ); + + const std::string & error() const { return error_; } + + // The number of arguments parsed. May be different from argc. + int arguments() const { return data.size(); } + + /* If code( i ) is 0, argument( i ) is a non-option. + Else argument( i ) is the option's argument (or empty). */ + int code( const int i ) const + { + if( i >= 0 && i < arguments() ) return data[i].code; + else return 0; + } + + // Full name of the option parsed (short or long). + const std::string & parsed_name( const int i ) const + { + if( i >= 0 && i < arguments() ) return data[i].parsed_name; + else return empty_arg; + } + + const std::string & argument( const int i ) const + { + if( i >= 0 && i < arguments() ) return data[i].argument; + else return empty_arg; + } + }; diff --git a/common.cc b/common.cc new file mode 100644 index 0000000..444280d --- /dev/null +++ b/common.cc @@ -0,0 +1,151 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cctype> +#include <cerrno> +#include <cstdlib> +#include <pthread.h> +#include <unistd.h> + +#include "tarlz.h" +#include "arg_parser.h" + + +void xinit_mutex( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_init( mutex, 0 ); + if( errcode ) + { show_error( "pthread_mutex_init", errcode ); exit_fail_mt(); } + } + +void xinit_cond( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_init( cond, 0 ); + if( errcode ) + { show_error( "pthread_cond_init", errcode ); exit_fail_mt(); } + } + + +void xdestroy_mutex( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_destroy( mutex ); + if( errcode ) + { show_error( "pthread_mutex_destroy", errcode ); exit_fail_mt(); } + } + +void xdestroy_cond( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_destroy( cond ); + if( errcode ) + { show_error( "pthread_cond_destroy", errcode ); exit_fail_mt(); } + } + + +void xlock( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_lock( mutex ); + if( errcode ) + { show_error( "pthread_mutex_lock", errcode ); exit_fail_mt(); } + } + + +void xunlock( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_unlock( mutex ); + if( errcode ) + { show_error( "pthread_mutex_unlock", errcode ); exit_fail_mt(); } + } + + +void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex ) + { + const int errcode = pthread_cond_wait( cond, mutex ); + if( errcode ) + { show_error( "pthread_cond_wait", errcode ); exit_fail_mt(); } + } + + +void xsignal( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_signal( cond ); + if( errcode ) + { show_error( "pthread_cond_signal", errcode ); exit_fail_mt(); } + } + + +void xbroadcast( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_broadcast( cond ); + if( errcode ) + { show_error( "pthread_cond_broadcast", errcode ); exit_fail_mt(); } + } + + +unsigned long long parse_octal( const uint8_t * const ptr, const int size ) + { + unsigned long long result = 0; + int i = 0; + while( i < size && std::isspace( ptr[i] ) ) ++i; + for( ; i < size && ptr[i] >= '0' && ptr[i] <= '7'; ++i ) + { result <<= 3; result += ptr[i] - '0'; } + return result; + } + + +/* Return the number of bytes really read. + If (value returned < size) and (errno == 0), means EOF was reached. +*/ +int readblock( const int fd, uint8_t * const buf, const int size ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = read( fd, buf + sz, size - sz ); + if( n > 0 ) sz += n; + else if( n == 0 ) break; // EOF + else if( errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +/* Return the number of bytes really written. + If (value returned < size), it is always an error. +*/ +int writeblock( const int fd, const uint8_t * const buf, const int size ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = write( fd, buf + sz, size - sz ); + if( n > 0 ) sz += n; + else if( n < 0 && errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +bool nonempty_arg( const Arg_parser & parser, const int i ) + { + return ( parser.code( i ) == 0 && !parser.argument( i ).empty() ); + } diff --git a/common_decode.cc b/common_decode.cc new file mode 100644 index 0000000..835687f --- /dev/null +++ b/common_decode.cc @@ -0,0 +1,243 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <ctime> +#include <sys/stat.h> + +#include "tarlz.h" +#include "arg_parser.h" + + +namespace { + +enum { mode_string_size = 10, + group_string_size = 1 + uname_l + 1 + gname_l + 1 }; // 67 + +void format_mode_string( const Tar_header header, char buf[mode_string_size] ) + { + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + + std::memcpy( buf, "----------", mode_string_size ); + switch( typeflag ) + { + case tf_regular: break; + case tf_link: buf[0] = 'h'; break; + case tf_symlink: buf[0] = 'l'; break; + case tf_chardev: buf[0] = 'c'; break; + case tf_blockdev: buf[0] = 'b'; break; + case tf_directory: buf[0] = 'd'; break; + case tf_fifo: buf[0] = 'p'; break; + case tf_hiperf: buf[0] = 'C'; break; + default: buf[0] = '?'; + } + const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits + const bool setuid = mode & S_ISUID; + const bool setgid = mode & S_ISGID; + const bool sticky = mode & S_ISVTX; + if( mode & S_IRUSR ) buf[1] = 'r'; + if( mode & S_IWUSR ) buf[2] = 'w'; + if( mode & S_IXUSR ) buf[3] = setuid ? 's' : 'x'; + else if( setuid ) buf[3] = 'S'; + if( mode & S_IRGRP ) buf[4] = 'r'; + if( mode & S_IWGRP ) buf[5] = 'w'; + if( mode & S_IXGRP ) buf[6] = setgid ? 's' : 'x'; + else if( setgid ) buf[6] = 'S'; + if( mode & S_IROTH ) buf[7] = 'r'; + if( mode & S_IWOTH ) buf[8] = 'w'; + if( mode & S_IXOTH ) buf[9] = sticky ? 't' : 'x'; + else if( sticky ) buf[9] = 'T'; + } + + +int format_user_group_string( const Extended & extended, + const Tar_header header, + char buf[group_string_size] ) + { + int len; + if( header[uname_o] && header[gname_o] ) + len = snprintf( buf, group_string_size, + " %.32s/%.32s", header + uname_o, header + gname_o ); + else + len = snprintf( buf, group_string_size, " %llu/%llu", + extended.get_uid(), extended.get_gid() ); + return len; + } + + +// return true if dir is a parent directory of name +bool compare_prefix_dir( const char * const dir, const char * const name ) + { + int len = 0; + while( dir[len] && dir[len] == name[len] ) ++len; + return ( !dir[len] && len > 0 && ( dir[len-1] == '/' || name[len] == '/' ) ); + } + + +// compare two file names ignoring trailing slashes +bool compare_tslash( const char * const name1, const char * const name2 ) + { + const char * p = name1; + const char * q = name2; + while( *p && *p == *q ) { ++p; ++q; } + while( *p == '/' ) ++p; + while( *q == '/' ) ++q; + return ( !*p && !*q ); + } + +} // end namespace + + +bool block_is_zero( const uint8_t * const buf, const int size ) + { + for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false; + return true; + } + + +bool format_member_name( const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const bool long_format ) + { + if( long_format ) + { + format_mode_string( header, rbuf() ); + const int group_string_len = + format_user_group_string( extended, header, rbuf() + mode_string_size ); + int offset = mode_string_size + group_string_len; + const time_t mtime = extended.mtime().sec(); + struct tm t; + if( !localtime_r( &mtime, &t ) ) // if local time fails + { time_t z = 0; if( !gmtime_r( &z, &t ) ) // use the UTC epoch + { t.tm_year = 70; t.tm_mon = t.tm_hour = t.tm_min = 0; t.tm_mday = 1; } } + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); + const char * const link_string = !islink ? "" : + ( ( typeflag == tf_link ) ? " link to " : " -> " ); + // print "user/group size" in a field of width 19 with 8 or more for size + if( typeflag == tf_chardev || typeflag == tf_blockdev ) + { + const unsigned devmajor = parse_octal( header + devmajor_o, devmajor_l ); + const unsigned devminor = parse_octal( header + devminor_o, devminor_l ); + const int width = std::max( 1, + std::max( 8, 19 - group_string_len ) - 1 - decimal_digits( devminor ) ); + offset += snprintf( rbuf() + offset, rbuf.size() - offset, " %*u,%u", + width, devmajor, devminor ); + } + else + { + const int width = std::max( 8, 19 - group_string_len ); + offset += snprintf( rbuf() + offset, rbuf.size() - offset, " %*llu", + width, extended.file_size() ); + } + for( int i = 0; i < 2; ++i ) // resize rbuf if not large enough + { + const int len = snprintf( rbuf() + offset, rbuf.size() - offset, + " %4d-%02u-%02u %02u:%02u %s%s%s\n", + 1900 + t.tm_year, 1 + t.tm_mon, t.tm_mday, t.tm_hour, + t.tm_min, extended.path().c_str(), link_string, + islink ? extended.linkpath().c_str() : "" ); + if( len + offset < (int)rbuf.size() ) break; + if( !rbuf.resize( len + offset + 1 ) ) return false; + } + } + else + { + if( rbuf.size() < extended.path().size() + 2 && + !rbuf.resize( extended.path().size() + 2 ) ) return false; + snprintf( rbuf(), rbuf.size(), "%s\n", extended.path().c_str() ); + } + return true; + } + + +bool show_member_name( const Extended & extended, const Tar_header header, + const int vlevel, Resizable_buffer & rbuf ) + { + if( verbosity >= vlevel ) + { + if( !format_member_name( extended, header, rbuf, verbosity > vlevel ) ) + { show_error( mem_msg ); return false; } + std::fputs( rbuf(), stdout ); + std::fflush( stdout ); + } + return true; + } + + +bool check_skip_filename( const Cl_options & cl_opts, + std::vector< char > & name_pending, + const char * const filename ) + { + if( Exclude::excluded( filename ) ) return true; // skip excluded files + bool skip = cl_opts.num_files > 0; // if no files specified, skip nothing + if( skip ) // else skip all but the files (or trees) specified + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + if( nonempty_arg( cl_opts.parser, i ) ) + { + std::string removed_prefix; + const char * const name = remove_leading_dotslash( + cl_opts.parser.argument( i ).c_str(), &removed_prefix ); + if( compare_prefix_dir( name, filename ) || + compare_tslash( name, filename ) ) + { print_removed_prefix( removed_prefix ); + skip = false; name_pending[i] = false; break; } + } + return skip; + } + + +mode_t get_umask() + { + static mode_t mask = 0; // read once, cache the result + static bool first_call = true; + if( first_call ) { first_call = false; mask = umask( 0 ); umask( mask ); + mask &= S_IRWXU | S_IRWXG | S_IRWXO; } + return mask; + } + + +bool make_path( const std::string & name ) + { + const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; + unsigned end = name.size(); // first slash before last component + + while( end > 0 && name[end-1] == '/' ) --end; // remove trailing slashes + while( end > 0 && name[end-1] != '/' ) --end; // remove last component + while( end > 0 && name[end-1] == '/' ) --end; // remove more slashes + + unsigned index = 0; + while( index < end ) + { + while( index < end && name[index] == '/' ) ++index; + unsigned first = index; + while( index < end && name[index] != '/' ) ++index; + if( first < index ) + { + const std::string partial( name, 0, index ); + struct stat st; + if( lstat( partial.c_str(), &st ) == 0 ) + { if( !S_ISDIR( st.st_mode ) ) { errno = ENOTDIR; return false; } } + else if( mkdir( partial.c_str(), mode ) != 0 && errno != EEXIST ) + return false; // if EEXIST, another thread or process created the dir + } + } + return true; + } diff --git a/compress.cc b/compress.cc new file mode 100644 index 0000000..4e74efa --- /dev/null +++ b/compress.cc @@ -0,0 +1,375 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cerrno> +#include <csignal> +#include <cstdio> +#include <cstdlib> +#include <stdint.h> // for lzlib.h +#include <unistd.h> +#include <utime.h> +#include <sys/stat.h> +#include <lzlib.h> + +#include "tarlz.h" +#include "arg_parser.h" + + +namespace { + +/* Variables used in signal handler context. + They are not declared volatile because the handler never returns. */ +std::string output_filename; +int outfd = -1; +bool delete_output_on_interrupt = false; + + +void set_signals( void (*action)(int) ) + { + std::signal( SIGHUP, action ); + std::signal( SIGINT, action ); + std::signal( SIGTERM, action ); + } + + +void cleanup_and_fail( const int retval ) + { + set_signals( SIG_IGN ); // ignore signals + if( delete_output_on_interrupt ) + { + delete_output_on_interrupt = false; + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", + program_name, output_filename.c_str() ); + if( outfd >= 0 ) { close( outfd ); outfd = -1; } + if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT ) + show_error( "WARNING: deletion of output file (apparently) failed." ); + } + std::exit( retval ); + } + + +extern "C" void signal_handler( int ) + { + show_error( "Control-C or similar caught, quitting." ); + cleanup_and_fail( 1 ); + } + + +const char * ne_output_filename() // non-empty output file name + { + return output_filename.size() ? output_filename.c_str() : "(stdout)"; + } + + +bool check_tty_in( const char * const input_filename, const int infd ) + { + if( isatty( infd ) ) // for example /dev/tty + { show_file_error( input_filename, + "I won't read archive data from a terminal." ); + close( infd ); return false; } + return true; + } + +bool check_tty_out() + { + if( isatty( outfd ) ) // for example /dev/tty + { show_file_error( ne_output_filename(), + "I won't write compressed data to a terminal." ); + return false; } + return true; + } + + +// Set permissions, owner, and times. +void close_and_set_permissions( const struct stat * const in_statsp ) + { + bool warning = false; + if( in_statsp ) + { + const mode_t mode = in_statsp->st_mode; + // fchown will in many cases return with EPERM, which can be safely ignored. + if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 ) + { if( fchmod( outfd, mode ) != 0 ) warning = true; } + else + if( errno != EPERM || + fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) + warning = true; + } + if( close( outfd ) != 0 ) + { + show_error( "Error closing output file", errno ); + cleanup_and_fail( 1 ); + } + outfd = -1; + delete_output_on_interrupt = false; + if( in_statsp ) + { + struct utimbuf t; + t.actime = in_statsp->st_atime; + t.modtime = in_statsp->st_mtime; + if( utime( output_filename.c_str(), &t ) != 0 ) warning = true; + } + if( warning && verbosity >= 1 ) + show_error( "Can't change output file attributes." ); + } + + +bool archive_write( const uint8_t * const buf, const long long size, + LZ_Encoder * const encoder ) + { + static bool flushed = true; // avoid flushing empty lzip members + + if( size <= 0 && flushed ) return true; + flushed = ( size <= 0 ); + enum { obuf_size = 65536 }; + uint8_t obuf[obuf_size]; + long long sz = 0; + if( flushed ) LZ_compress_finish( encoder ); // flush encoder + while( sz < size || flushed ) + { + if( sz < size ) + { const int wr = LZ_compress_write( encoder, buf + sz, + std::min( size - sz, (long long)max_dictionary_size ) ); + if( wr < 0 ) internal_error( "library error (LZ_compress_write)." ); + sz += wr; } + if( sz >= size && !flushed ) break; // minimize dictionary size + const int rd = LZ_compress_read( encoder, obuf, obuf_size ); + if( rd < 0 ) internal_error( "library error (LZ_compress_read)." ); + if( rd == 0 && sz >= size ) break; + if( writeblock( outfd, obuf, rd ) != rd ) + { show_file_error( ne_output_filename(), werr_msg, errno ); return false; } + } + if( LZ_compress_finished( encoder ) == 1 && + LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 ) + internal_error( "library error (LZ_compress_restart_member)." ); + return true; + } + + +bool tail_compress( const Cl_options & cl_opts, + const int infd, Tar_header header, + LZ_Encoder * const encoder ) + { + if( cl_opts.solidity != solid && !archive_write( 0, 0, encoder ) ) + return false; // flush encoder before compressing EOA blocks + int size = header_size; + bool zero = true; // true until non-zero data found after EOA blocks + while( true ) + { + if( size > 0 && !archive_write( header, size, encoder ) ) + { close( infd ); return false; } + if( size < header_size ) break; // EOF + size = readblock( infd, header, header_size ); + if( errno ) return false; + if( zero && !block_is_zero( header, size ) ) + { zero = false; // flush encoder after compressing EOA blocks + if( cl_opts.solidity != solid && !archive_write( 0, 0, encoder ) ) + return false; } + } + return true; + } + + +int compress_archive( const Cl_options & cl_opts, + const std::string & input_filename, + LZ_Encoder * const encoder, + const bool to_stdout, const bool to_file ) + { + const bool one_to_one = !to_stdout && !to_file; + const bool from_stdin = input_filename == "-"; + const char * const filename = from_stdin ? "(stdin)" : input_filename.c_str(); + const int infd = from_stdin ? STDIN_FILENO : open_instream( filename ); + if( infd < 0 || !check_tty_in( filename, infd ) ) return 1; + if( one_to_one ) + { + if( from_stdin ) { outfd = STDOUT_FILENO; output_filename.clear(); } + else + { + output_filename = input_filename + ".lz"; + outfd = open_outstream( output_filename, true, 0, false ); + if( outfd < 0 ) { close( infd ); return 1; } + delete_output_on_interrupt = true; + } + if( !check_tty_out() ) { close( infd ); return 1; } // don't delete a tty + } + if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename ); + + unsigned long long partial_data_size = 0; // size of current block + Extended extended; // metadata from extended records + Resizable_buffer rbuf; // headers and extended records buffer + if( !rbuf.size() ) { show_error( mem_msg ); return 1; } + const char * const rderr_msg = "Read error"; + + while( true ) // process one tar member per iteration + { + int total_header_size = header_size; // size of header(s) read + const int rd = readblock( infd, rbuf.u8(), header_size ); + if( rd == 0 && errno == 0 ) break; // missing EOA blocks + if( rd != header_size ) + { show_file_error( filename, rderr_msg, errno ); close( infd ); return 1; } + + if( to_file && outfd < 0 ) // open outfd after verifying infd + { + outfd = open_outstream( output_filename, true, 0, false ); + // check tty only once and don't try to delete a tty + if( outfd < 0 || !check_tty_out() ) { close( infd ); return 1; } + delete_output_on_interrupt = true; + } + + if( !verify_ustar_chksum( rbuf.u8() ) ) // maybe EOA block + { + if( block_is_zero( rbuf.u8(), header_size ) ) // first EOA block + { tail_compress( cl_opts, infd, rbuf.u8(), encoder ); break; } + show_file_error( filename, bad_hdr_msg ); close( infd ); return 2; + } + + const Typeflag typeflag = (Typeflag)rbuf()[typeflag_o]; + if( typeflag == tf_extended || typeflag == tf_global ) + { + const long long edsize = parse_octal( rbuf.u8() + size_o, size_l ); + const long long bufsize = round_up( edsize ); + // overflow or no extended data + if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX ) + { show_file_error( filename, bad_hdr_msg ); close( infd ); return 2; } + if( !rbuf.resize( total_header_size + bufsize ) ) + { show_file_error( filename, mem_msg ); close( infd ); return 1; } + if( readblock( infd, rbuf.u8() + total_header_size, bufsize ) != bufsize ) + { show_file_error( filename, rderr_msg, errno ); close( infd ); return 1; } + total_header_size += bufsize; + if( typeflag == tf_extended ) // do not parse global headers + { + if( !extended.parse( rbuf() + header_size, edsize, false ) ) + { show_file_error( filename, extrec_msg ); close( infd ); return 2; } + // read ustar header + if( !rbuf.resize( total_header_size + header_size ) ) + { show_file_error( filename, mem_msg ); close( infd ); return 1; } + if( readblock( infd, rbuf.u8() + total_header_size, header_size ) != header_size ) + { show_file_error( filename, errno ? rderr_msg : end_msg, errno ); + close( infd ); return errno ? 1 : 2; } + if( !verify_ustar_chksum( rbuf.u8() ) ) + { show_file_error( filename, bad_hdr_msg ); close( infd ); return 2; } + const Typeflag typeflag2 = (Typeflag)(rbuf() + total_header_size)[typeflag_o]; + if( typeflag2 == tf_extended || typeflag2 == tf_global ) + { const char * msg = ( typeflag2 == tf_global ) ? fv_msg2 : fv_msg3; + show_file_error( filename, msg ); close( infd ); return 2; } + total_header_size += header_size; + } + } + + const long long file_size = round_up( extended.get_file_size_and_reset( + rbuf.u8() + total_header_size - header_size ) ); + if( cl_opts.solidity == bsolid && + block_is_full( total_header_size - header_size, file_size, + cl_opts.data_size, partial_data_size ) && + !archive_write( 0, 0, encoder ) ) { close( infd ); return 1; } + if( !archive_write( rbuf.u8(), total_header_size, encoder ) ) + { close( infd ); return 1; } + + if( file_size ) + { + const long long bufsize = 32 * header_size; + uint8_t buf[bufsize]; + long long rest = file_size; // file_size already rounded up + while( rest > 0 ) + { + int size = std::min( rest, bufsize ); + const int rd = readblock( infd, buf, size ); + rest -= rd; + if( rd != size ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "'%s' ends unexpectedly at pos %llu\n", + filename, file_size - rest ); + close( infd ); return 1; + } + if( !archive_write( buf, size, encoder ) ) { close( infd ); return 1; } + } + } + if( cl_opts.solidity == no_solid && !archive_write( 0, 0, encoder ) ) + { close( infd ); return 1; } // one tar member per lzip member + } + // flush and restart encoder (for next archive) + if( !archive_write( 0, 0, encoder ) ) { close( infd ); return 1; } + const bool need_close = delete_output_on_interrupt && + ( one_to_one || ( to_file && !from_stdin ) ); + struct stat in_stats; + const struct stat * const in_statsp = + ( need_close && fstat( infd, &in_stats ) == 0 ) ? &in_stats : 0; + if( close( infd ) != 0 ) + { show_file_error( filename, eclosf_msg, errno ); return 1; } + if( need_close ) close_and_set_permissions( in_statsp ); + return 0; + } + +} // end namespace + + +int compress( const Cl_options & cl_opts ) + { + if( cl_opts.num_files > 1 && cl_opts.output_filename.size() ) + { show_file_error( cl_opts.output_filename.c_str(), + "Only can compress one archive when using '-o'." ); return 1; } + const bool to_stdout = cl_opts.output_filename == "-"; + if( to_stdout ) // check tty only once + { outfd = STDOUT_FILENO; if( !check_tty_out() ) return 1; } + else outfd = -1; + const bool to_file = !to_stdout && cl_opts.output_filename.size(); + if( to_file ) output_filename = cl_opts.output_filename; + if( !to_stdout && ( cl_opts.filenames_given || to_file ) ) + set_signals( signal_handler ); + + LZ_Encoder * encoder = LZ_compress_open( + option_mapping[cl_opts.level].dictionary_size, + option_mapping[cl_opts.level].match_len_limit, LLONG_MAX ); + if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) + { + if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error ) + show_error( mem_msg2 ); + else + internal_error( "invalid argument to encoder." ); + return 1; + } + + if( !cl_opts.filenames_given ) + return compress_archive( cl_opts, "-", encoder, to_stdout, to_file ); + int retval = 0; + bool stdin_used = false; + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + if( nonempty_arg( cl_opts.parser, i ) ) // skip opts, empty names + { + if( cl_opts.parser.argument( i ) == "-" ) + { if( stdin_used ) continue; else stdin_used = true; } + const int tmp = compress_archive( cl_opts, cl_opts.parser.argument( i ), + encoder, to_stdout, to_file ); + if( tmp ) + { set_retval( retval, tmp ); + if( delete_output_on_interrupt ) cleanup_and_fail( retval ); } + } + // flush and close encoder if needed + if( outfd >= 0 && archive_write( 0, 0, encoder ) && + LZ_compress_close( encoder ) < 0 ) + { show_error( "LZ_compress_close failed." ); set_retval( retval, 1 ); } + if( outfd >= 0 && close( outfd ) != 0 ) // to_stdout + { + show_error( "Error closing stdout", errno ); + set_retval( retval, 1 ); + } + return retval; + } diff --git a/configure b/configure new file mode 100755 index 0000000..e02fcf0 --- /dev/null +++ b/configure @@ -0,0 +1,200 @@ +#! /bin/sh +# configure script for Tarlz - Archiver with multimember lzip compression +# Copyright (C) 2013-2022 Antonio Diaz Diaz. +# +# This configure script is free software: you have unlimited permission +# to copy, distribute, and modify it. + +pkgname=tarlz +pkgversion=0.23 +progname=tarlz +srctrigger=doc/${pkgname}.texi + +# clear some things potentially inherited from environment. +LC_ALL=C +export LC_ALL +srcdir= +prefix=/usr/local +exec_prefix='$(prefix)' +bindir='$(exec_prefix)/bin' +datarootdir='$(prefix)/share' +infodir='$(datarootdir)/info' +mandir='$(datarootdir)/man' +CXX=g++ +CPPFLAGS= +CXXFLAGS='-Wall -W -O2' +LDFLAGS= +LIBS='-llz -lpthread' + +# checking whether we are using GNU C++. +/bin/sh -c "${CXX} --version" > /dev/null 2>&1 || { CXX=c++ ; CXXFLAGS=-O2 ; } + +# Loop over all args +args= +no_create= +while [ $# != 0 ] ; do + + # Get the first arg, and shuffle + option=$1 ; arg2=no + shift + + # Add the argument quoted to args + if [ -z "${args}" ] ; then args="\"${option}\"" + else args="${args} \"${option}\"" ; fi + + # Split out the argument for options that take them + case ${option} in + *=*) optarg=`echo "${option}" | sed -e 's,^[^=]*=,,;s,/$,,'` ;; + esac + + # Process the options + case ${option} in + --help | -h) + echo "Usage: $0 [OPTION]... [VAR=VALUE]..." + echo + echo "To assign makefile variables (e.g., CXX, CXXFLAGS...), specify them as" + echo "arguments to configure in the form VAR=VALUE." + echo + echo "Options and variables: [defaults in brackets]" + echo " -h, --help display this help and exit" + echo " -V, --version output version information and exit" + echo " --srcdir=DIR find the sources in DIR [. or ..]" + echo " --prefix=DIR install into DIR [${prefix}]" + echo " --exec-prefix=DIR base directory for arch-dependent files [${exec_prefix}]" + echo " --bindir=DIR user executables directory [${bindir}]" + echo " --datarootdir=DIR base directory for doc and data [${datarootdir}]" + echo " --infodir=DIR info files directory [${infodir}]" + echo " --mandir=DIR man pages directory [${mandir}]" + echo " CXX=COMPILER C++ compiler to use [${CXX}]" + echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]" + echo " CXXFLAGS=OPTIONS command line options for the C++ compiler [${CXXFLAGS}]" + echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS" + echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]" + echo " LIBS=OPTIONS libraries to pass to the linker [${LIBS}]" + echo + exit 0 ;; + --version | -V) + echo "Configure script for ${pkgname} version ${pkgversion}" + exit 0 ;; + --srcdir) srcdir=$1 ; arg2=yes ;; + --prefix) prefix=$1 ; arg2=yes ;; + --exec-prefix) exec_prefix=$1 ; arg2=yes ;; + --bindir) bindir=$1 ; arg2=yes ;; + --datarootdir) datarootdir=$1 ; arg2=yes ;; + --infodir) infodir=$1 ; arg2=yes ;; + --mandir) mandir=$1 ; arg2=yes ;; + + --srcdir=*) srcdir=${optarg} ;; + --prefix=*) prefix=${optarg} ;; + --exec-prefix=*) exec_prefix=${optarg} ;; + --bindir=*) bindir=${optarg} ;; + --datarootdir=*) datarootdir=${optarg} ;; + --infodir=*) infodir=${optarg} ;; + --mandir=*) mandir=${optarg} ;; + --no-create) no_create=yes ;; + + CXX=*) CXX=${optarg} ;; + CPPFLAGS=*) CPPFLAGS=${optarg} ;; + CXXFLAGS=*) CXXFLAGS=${optarg} ;; + CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;; + LDFLAGS=*) LDFLAGS=${optarg} ;; + LIBS=*) LIBS="${optarg} ${LIBS}" ;; + + --*) + echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; + *=* | *-*-*) ;; + *) + echo "configure: unrecognized option: '${option}'" 1>&2 + echo "Try 'configure --help' for more information." 1>&2 + exit 1 ;; + esac + + # Check if the option took a separate argument + if [ "${arg2}" = yes ] ; then + if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift + else echo "configure: Missing argument to '${option}'" 1>&2 + exit 1 + fi + fi +done + +# Find the source files, if location was not specified. +srcdirtext= +if [ -z "${srcdir}" ] ; then + srcdirtext="or . or .." ; srcdir=. + if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi + if [ ! -r "${srcdir}/${srctrigger}" ] ; then + ## the sed command below emulates the dirname command + srcdir=`echo "$0" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` + fi +fi + +if [ ! -r "${srcdir}/${srctrigger}" ] ; then + echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2 + echo "configure: (At least ${srctrigger} is missing)." 1>&2 + exit 1 +fi + +# Set srcdir to . if that's what it is. +if [ "`pwd`" = "`cd "${srcdir}" ; pwd`" ] ; then srcdir=. ; fi + +echo +if [ -z "${no_create}" ] ; then + echo "creating config.status" + rm -f config.status + cat > config.status << EOF +#! /bin/sh +# This file was generated automatically by configure. Don't edit. +# Run this file to recreate the current configuration. +# +# This script is free software: you have unlimited permission +# to copy, distribute, and modify it. + +exec /bin/sh $0 ${args} --no-create +EOF + chmod +x config.status +fi + +echo "creating Makefile" +echo "VPATH = ${srcdir}" +echo "prefix = ${prefix}" +echo "exec_prefix = ${exec_prefix}" +echo "bindir = ${bindir}" +echo "datarootdir = ${datarootdir}" +echo "infodir = ${infodir}" +echo "mandir = ${mandir}" +echo "CXX = ${CXX}" +echo "CPPFLAGS = ${CPPFLAGS}" +echo "CXXFLAGS = ${CXXFLAGS}" +echo "LDFLAGS = ${LDFLAGS}" +echo "LIBS = ${LIBS}" +rm -f Makefile +cat > Makefile << EOF +# Makefile for Tarlz - Archiver with multimember lzip compression +# Copyright (C) 2013-2022 Antonio Diaz Diaz. +# This file was generated automatically by configure. Don't edit. +# +# This Makefile is free software: you have unlimited permission +# to copy, distribute, and modify it. + +pkgname = ${pkgname} +pkgversion = ${pkgversion} +progname = ${progname} +VPATH = ${srcdir} +prefix = ${prefix} +exec_prefix = ${exec_prefix} +bindir = ${bindir} +datarootdir = ${datarootdir} +infodir = ${infodir} +mandir = ${mandir} +CXX = ${CXX} +CPPFLAGS = ${CPPFLAGS} +CXXFLAGS = ${CXXFLAGS} +LDFLAGS = ${LDFLAGS} +LIBS = ${LIBS} +EOF +cat "${srcdir}/Makefile.in" >> Makefile + +echo "OK. Now you can run make." +echo "If make fails, verify that the compression library lzlib is correctly" +echo "installed (see INSTALL)." diff --git a/create.cc b/create.cc new file mode 100644 index 0000000..53ba5f5 --- /dev/null +++ b/create.cc @@ -0,0 +1,788 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <pthread.h> +#include <stdint.h> // for lzlib.h +#include <unistd.h> +#include <sys/stat.h> +#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ + !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__ +#include <sys/sysmacros.h> // for major, minor +#endif +#include <ftw.h> +#include <grp.h> +#include <pwd.h> +#include <lzlib.h> + +#include "tarlz.h" +#include "arg_parser.h" +#include "create.h" + + +Archive_attrs archive_attrs; // archive attributes at time of creation + + +namespace { + +const Cl_options * gcl_opts = 0; // local vars needed by add_member +LZ_Encoder * encoder = 0; +const char * archive_namep = 0; +unsigned long long partial_data_size = 0; // size of current block +Resizable_buffer grbuf; // extended header + data +int goutfd = -1; +int error_status = 0; + + +bool option_C_after_relative_filename( const Arg_parser & parser ) + { + for( int i = 0; i < parser.arguments(); ++i ) + if( nonempty_arg( parser, i ) && parser.argument( i )[0] != '/' ) + while( ++i < parser.arguments() ) + if( parser.code( i ) == 'C' ) return true; + return false; + } + + +/* Check archive type. Return position of EOA blocks or -1 if failure. + If remove_eoa, leave fd file pos at beginning of the EOA blocks. + Else, leave fd file pos at 0. +*/ +long long check_compressed_appendable( const int fd, const bool remove_eoa ) + { + struct stat st; // fd must be regular + if( fstat( fd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return -1; + if( lseek( fd, 0, SEEK_SET ) != 0 ) return -1; + enum { bufsize = header_size + ( header_size / 8 ) }; + uint8_t buf[bufsize]; + const int rd = readblock( fd, buf, bufsize ); + if( rd == 0 && errno == 0 ) return 0; // append to empty archive + if( rd < min_member_size || ( rd != bufsize && errno ) ) return -1; + const Lzip_header * const p = (const Lzip_header *)buf; // shut up gcc + if( !p->verify_magic() || !p->verify_version() ) return -1; + LZ_Decoder * decoder = LZ_decompress_open(); // decompress first header + if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok || + LZ_decompress_write( decoder, buf, rd ) != rd || + LZ_decompress_read( decoder, buf, header_size ) != header_size ) + { LZ_decompress_close( decoder ); return -1; } + LZ_decompress_close( decoder ); + const bool maybe_eoa = block_is_zero( buf, header_size ); + if( !verify_ustar_chksum( buf ) && !maybe_eoa ) return -1; + const long long end = lseek( fd, 0, SEEK_END ); + if( end < min_member_size ) return -1; + + Lzip_trailer trailer; // read last trailer + if( seek_read( fd, trailer.data, Lzip_trailer::size, + end - Lzip_trailer::size ) != Lzip_trailer::size ) return -1; + const long long member_size = trailer.member_size(); + if( member_size < min_member_size || member_size > end || + ( maybe_eoa && member_size != end ) ) return -1; // garbage after EOA? + + Lzip_header header; // read last header + if( seek_read( fd, header.data, Lzip_header::size, + end - member_size ) != Lzip_header::size ) return -1; + if( !header.verify_magic() || !header.verify_version() || + !isvalid_ds( header.dictionary_size() ) ) return -1; + + // EOA marker in last member must contain between 512 and 32256 zeros alone + const unsigned long long data_size = trailer.data_size(); + if( data_size < header_size || data_size > 32256 ) return -1; + const unsigned data_crc = trailer.data_crc(); + const CRC32 crc32; + uint32_t crc = 0xFFFFFFFFU; + for( unsigned i = 0; i < data_size; ++i ) crc32.update_byte( crc, 0 ); + crc ^= 0xFFFFFFFFU; + if( crc != data_crc ) return -1; + + const long long pos = remove_eoa ? end - member_size : 0; + if( lseek( fd, pos, SEEK_SET ) != pos ) return -1; + return end - member_size; + } + + +/* Skip all tar headers. + Return position of EOA blocks, -1 if failure, -2 if out of memory. + If remove_eoa, leave fd file pos at beginning of the EOA blocks. + Else, leave fd file pos at 0. +*/ +long long check_uncompressed_appendable( const int fd, const bool remove_eoa ) + { + struct stat st; // fd must be regular + if( fstat( fd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return -1; + if( lseek( fd, 0, SEEK_SET ) != 0 ) return -1; + if( st.st_size <= 0 ) return 0; // append to empty archive + long long eoa_pos = 0; // pos of EOA blocks + Extended extended; // metadata from extended records + Resizable_buffer rbuf; // extended records buffer + bool prev_extended = false; // prev header was extended + if( !rbuf.size() ) return -2; + + while( true ) // process one tar header per iteration + { + Tar_header header; + const int rd = readblock( fd, header, header_size ); + if( rd == 0 && errno == 0 ) break; // missing EOA blocks + if( rd != header_size ) return -1; + if( !verify_ustar_chksum( header ) ) // maybe EOA block + { if( block_is_zero( header, header_size ) ) break; else return -1; } + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_extended || typeflag == tf_global ) + { + if( prev_extended ) return -1; + const long long edsize = parse_octal( header + size_o, size_l ); + const long long bufsize = round_up( edsize ); + if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX ) + return -1; // overflow or no extended data + if( !rbuf.resize( bufsize ) ) return -2; + if( readblock( fd, rbuf.u8(), bufsize ) != bufsize ) + return -1; + if( typeflag == tf_extended ) + { if( !extended.parse( rbuf(), edsize, false ) ) return -1; + prev_extended = true; } + continue; + } + prev_extended = false; + + eoa_pos = lseek( fd, round_up( extended.get_file_size_and_reset( header ) ), + SEEK_CUR ); + if( eoa_pos <= 0 ) return -1; + } + + if( prev_extended ) return -1; + const long long pos = remove_eoa ? eoa_pos : 0; + if( lseek( fd, pos, SEEK_SET ) != pos ) return -1; + return eoa_pos; + } + + +bool archive_write( const uint8_t * const buf, const int size ) + { + static bool flushed = true; // avoid flushing empty lzip members + + if( size <= 0 && flushed ) return true; + flushed = ( size <= 0 ); + if( !encoder ) // uncompressed + return writeblock_wrapper( goutfd, buf, size ); + enum { obuf_size = 65536 }; + uint8_t obuf[obuf_size]; + int sz = 0; + if( size <= 0 ) LZ_compress_finish( encoder ); // flush encoder + while( sz < size || size <= 0 ) + { + const int wr = LZ_compress_write( encoder, buf + sz, size - sz ); + if( wr < 0 ) internal_error( "library error (LZ_compress_write)." ); + sz += wr; + if( sz >= size && size > 0 ) break; // minimize dictionary size + const int rd = LZ_compress_read( encoder, obuf, obuf_size ); + if( rd < 0 ) internal_error( "library error (LZ_compress_read)." ); + if( rd == 0 && sz >= size ) break; + if( !writeblock_wrapper( goutfd, obuf, rd ) ) return false; + } + if( LZ_compress_finished( encoder ) == 1 && + LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 ) + internal_error( "library error (LZ_compress_restart_member)." ); + return true; + } + + +bool write_extended( const Extended & extended ) + { + const long long ebsize = extended.format_block( grbuf ); // may be 0 + if( ebsize < 0 ) + { show_error( ( ebsize == -2 ) ? mem_msg2 : eferec_msg ); return false; } + for( long long pos = 0; pos < ebsize; ) // write extended block to archive + { + int size = std::min( ebsize - pos, 1LL << 20 ); + if( !archive_write( grbuf.u8() + pos, size ) ) return false; + pos += size; + } + return true; + } + + +// Return true if it stores filename in the ustar header. +bool store_name( const char * const filename, Extended & extended, + Tar_header header, const bool force_extended_name ) + { + const char * const stored_name = + remove_leading_dotslash( filename, &extended.removed_prefix, true ); + + if( !force_extended_name ) // try storing filename in the ustar header + { + const int len = std::strlen( stored_name ); + enum { max_len = prefix_l + 1 + name_l }; // prefix + '/' + name + if( len <= name_l ) // stored_name fits in name + { std::memcpy( header + name_o, stored_name, len ); return true; } + if( len <= max_len ) // find shortest prefix + for( int i = len - name_l - 1; i < len && i <= prefix_l; ++i ) + if( stored_name[i] == '/' ) // stored_name can be split + { + std::memcpy( header + name_o, stored_name + i + 1, len - i - 1 ); + std::memcpy( header + prefix_o, stored_name, i ); + return true; + } + } + // store filename in extended record, leave name zeroed in ustar header + extended.path( stored_name ); + return false; + } + + +// add one tar member to the archive +int add_member( const char * const filename, const struct stat *, + const int flag, struct FTW * ) + { + if( Exclude::excluded( filename ) ) return 0; // skip excluded files + long long file_size; + Extended extended; // metadata for extended records + Tar_header header; + if( !fill_headers( filename, extended, header, file_size, flag ) ) return 0; + print_removed_prefix( extended.removed_prefix ); + const int infd = file_size ? open_instream( filename ) : -1; + if( file_size && infd < 0 ) { set_error_status( 1 ); return 0; } + + if( encoder && gcl_opts->solidity == bsolid && + block_is_full( extended.full_size(), file_size, gcl_opts->data_size, + partial_data_size ) && !archive_write( 0, 0 ) ) return 1; + + if( !write_extended( extended ) || !archive_write( header, header_size ) ) + return 1; + if( file_size ) + { + const long long bufsize = 32 * header_size; + uint8_t buf[bufsize]; + long long rest = file_size; + while( rest > 0 ) + { + int size = std::min( rest, bufsize ); + const int rd = readblock( infd, buf, size ); + rest -= rd; + if( rd != size ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "File '%s' ends unexpectedly at pos %llu\n", + filename, file_size - rest ); + close( infd ); return 1; + } + if( rest == 0 ) // last read + { + const int rem = file_size % header_size; + if( rem > 0 ) + { const int padding = header_size - rem; + std::memset( buf + size, 0, padding ); size += padding; } + } + if( !archive_write( buf, size ) ) { close( infd ); return 1; } + } + if( close( infd ) != 0 ) + { show_file_error( filename, eclosf_msg, errno ); return 1; } + } + if( encoder && gcl_opts->solidity == no_solid && !archive_write( 0, 0 ) ) + return 1; + if( gcl_opts->warn_newer && archive_attrs.is_newer( filename ) ) + { show_file_error( filename, "File is newer than the archive." ); + set_error_status( 1 ); } + if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename ); + return 0; + } + + +bool check_tty_out( const char * const archive_namep, const int outfd, + const bool to_stdout ) + { + if( isatty( outfd ) ) // for example /dev/tty + { show_file_error( archive_namep, to_stdout ? + "I won't write archive data to a terminal (missing -f option?)" : + "I won't write archive data to a terminal." ); + return false; } + return true; + } + +} // end namespace + + +// infd and outfd can refer to the same file if copying to a lower file +// position or if source and destination blocks don't overlap. +// max_size < 0 means no size limit. +bool copy_file( const int infd, const int outfd, const long long max_size ) + { + const long long buffer_size = 65536; + // remaining number of bytes to copy + long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size ); + long long copied_size = 0; + uint8_t * const buffer = new uint8_t[buffer_size]; + bool error = false; + + while( rest > 0 ) + { + const int size = std::min( buffer_size, rest ); + if( max_size >= 0 ) rest -= size; + const int rd = readblock( infd, buffer, size ); + if( rd != size && errno ) + { show_error( "Error reading input file", errno ); error = true; break; } + if( rd > 0 ) + { + if( !writeblock_wrapper( outfd, buffer, rd ) ) { error = true; break; } + copied_size += rd; + } + if( rd < size ) break; // EOF + } + delete[] buffer; + return ( !error && ( max_size < 0 || copied_size == max_size ) ); + } + + +bool writeblock_wrapper( const int outfd, const uint8_t * const buffer, + const int size ) + { + if( writeblock( outfd, buffer, size ) != size ) + { show_file_error( archive_namep, werr_msg, errno ); return false; } + return true; + } + + +// write End-Of-Archive records +bool write_eoa_records( const int outfd, const bool compressed ) + { + if( compressed ) + { + enum { eoa_member_size = 44 }; + const uint8_t eoa_member[eoa_member_size] = { + 0x4C, 0x5A, 0x49, 0x50, 0x01, 0x0C, 0x00, 0x00, 0x6F, 0xFD, 0xFF, 0xFF, + 0xA3, 0xB7, 0x80, 0x0C, 0x82, 0xDB, 0xFF, 0xFF, 0x9F, 0xF0, 0x00, 0x00, + 0x2E, 0xAF, 0xB5, 0xEF, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + return writeblock_wrapper( outfd, eoa_member, eoa_member_size ); + } + enum { bufsize = 2 * header_size }; + uint8_t buf[bufsize]; + std::memset( buf, 0, bufsize ); + return writeblock_wrapper( outfd, buf, bufsize ); + } + + +/* Remove any amount of leading "./" and '/' strings from filename. + Optionally also remove prefixes containing a ".." component. + Return the removed prefix in *removed_prefixp. +*/ +const char * remove_leading_dotslash( const char * const filename, + std::string * const removed_prefixp, + const bool dotdot ) + { + const char * p = filename; + + if( dotdot ) + for( int i = 0; filename[i]; ++i ) + if( dotdot_at_i( filename, i ) ) p = filename + i + 2; + while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p; + if( p != filename ) removed_prefixp->assign( filename, p - filename ); + else removed_prefixp->clear(); // no prefix was removed + if( *p == 0 && *filename != 0 ) p = "."; + return p; + } + + +/* If msgp is null, print the message, else return the message in *msgp. + If prefix is already in the list, print nothing or return empty *msgp. + Return true if a message is printed or returned in *msgp. */ +bool print_removed_prefix( const std::string & prefix, + std::string * const msgp ) + { + // prevent two threads from modifying the list of prefixes at the same time + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + static std::vector< std::string > prefixes; // list of prefixes + + if( verbosity < 0 || prefix.empty() ) + { if( msgp ) msgp->clear(); return false; } + xlock( &mutex ); + for( unsigned i = 0; i < prefixes.size(); ++i ) + if( prefixes[i] == prefix ) + { xunlock( &mutex ); if( msgp ) msgp->clear(); return false; } + prefixes.push_back( prefix ); + std::string msg( "Removing leading '" ); msg += prefix; + msg += "' from member names."; + if( msgp ) *msgp = msg; else show_error( msg.c_str() ); + xunlock( &mutex ); // put here to prevent mixing calls to show_error + return true; + } + + +// set file_size != 0 only for regular files +bool fill_headers( const char * const filename, Extended & extended, + Tar_header header, long long & file_size, const int flag ) + { + struct stat st; + if( hstat( filename, &st, gcl_opts->dereference ) != 0 ) + { show_file_error( filename, cant_stat, errno ); + set_error_status( 1 ); return false; } + if( archive_attrs.is_the_archive( st ) ) + { show_file_error( archive_namep, "Archive can't contain itself; not dumped." ); + return false; } + init_tar_header( header ); + bool force_extended_name = false; + + const mode_t mode = st.st_mode; + print_octal( header + mode_o, mode_l - 1, + mode & ( S_ISUID | S_ISGID | S_ISVTX | + S_IRWXU | S_IRWXG | S_IRWXO ) ); + const long long uid = ( gcl_opts->uid >= 0 ) ? gcl_opts->uid : st.st_uid; + const long long gid = ( gcl_opts->gid >= 0 ) ? gcl_opts->gid : st.st_gid; + if( uid_in_ustar_range( uid ) ) print_octal( header + uid_o, uid_l - 1, uid ); + else if( extended.set_uid( uid ) ) force_extended_name = true; + if( uid_in_ustar_range( gid ) ) print_octal( header + gid_o, gid_l - 1, gid ); + else if( extended.set_gid( gid ) ) force_extended_name = true; + const long long mtime = gcl_opts->mtime_set ? gcl_opts->mtime : st.st_mtime; + if( time_in_ustar_range( mtime ) ) + print_octal( header + mtime_o, mtime_l - 1, mtime ); + else { extended.set_atime( gcl_opts->mtime_set ? mtime : st.st_atime ); + extended.set_mtime( mtime ); force_extended_name = true; } + Typeflag typeflag; + if( S_ISREG( mode ) ) typeflag = tf_regular; + else if( S_ISDIR( mode ) ) + { + typeflag = tf_directory; + if( flag == FTW_DNR ) + { show_file_error( filename, "Can't open directory", errno ); + set_error_status( 1 ); return false; } + } + else if( S_ISLNK( mode ) ) + { + typeflag = tf_symlink; + long len, sz; + if( st.st_size <= linkname_l ) + { + len = sz = readlink( filename, (char *)header + linkname_o, linkname_l ); + while( len > 1 && header[linkname_o+len-1] == '/' ) // trailing '/' + { --len; header[linkname_o+len] = 0; } + } + else + { + char * const buf = new char[st.st_size+1]; + len = sz = readlink( filename, buf, st.st_size ); + if( sz == st.st_size ) + { + while( len > 1 && buf[len-1] == '/' ) --len; // trailing '/' + if( len <= linkname_l ) std::memcpy( header + linkname_o, buf, len ); + else { buf[len] = 0; extended.linkpath( buf ); + force_extended_name = true; } + } + delete[] buf; + } + if( sz != st.st_size ) + { + if( sz < 0 ) + show_file_error( filename, "Error reading symbolic link", errno ); + else + show_file_error( filename, "Wrong size reading symbolic link.\n" + "Please, send a bug report to the maintainers of your filesystem, " + "mentioning\n'wrong st_size of symbolic link'.\nSee " + "http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/sys_stat.h.html" ); + set_error_status( 1 ); return false; + } + } + else if( S_ISCHR( mode ) || S_ISBLK( mode ) ) + { + typeflag = S_ISCHR( mode ) ? tf_chardev : tf_blockdev; + if( (unsigned)major( st.st_rdev ) >= 2 << 20 || + (unsigned)minor( st.st_rdev ) >= 2 << 20 ) + { show_file_error( filename, "devmajor or devminor is larger than 2_097_151." ); + set_error_status( 1 ); return false; } + print_octal( header + devmajor_o, devmajor_l - 1, major( st.st_rdev ) ); + print_octal( header + devminor_o, devminor_l - 1, minor( st.st_rdev ) ); + } + else if( S_ISFIFO( mode ) ) typeflag = tf_fifo; + else { show_file_error( filename, "Unknown file type." ); + set_error_status( 2 ); return false; } + header[typeflag_o] = typeflag; + + if( uid == (long long)( (uid_t)uid ) ) // get name if uid is in range + { const struct passwd * const pw = getpwuid( uid ); + if( pw && pw->pw_name ) + std::strncpy( (char *)header + uname_o, pw->pw_name, uname_l - 1 ); } + + if( gid == (long long)( (gid_t)gid ) ) // get name if gid is in range + { const struct group * const gr = getgrgid( gid ); + if( gr && gr->gr_name ) + std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 ); } + + file_size = ( typeflag == tf_regular && st.st_size > 0 && + st.st_size <= max_file_size ) ? st.st_size : 0; + if( file_size >= 1LL << 33 ) + { extended.file_size( file_size ); force_extended_name = true; } + else print_octal( header + size_o, size_l - 1, file_size ); + store_name( filename, extended, header, force_extended_name ); + print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) ); + return true; + } + + +bool block_is_full( const long long extended_size, + const unsigned long long file_size, + const unsigned long long target_size, + unsigned long long & partial_data_size ) + { + const unsigned long long member_size = // may overflow 'long long' + header_size + extended_size + round_up( file_size ); + if( partial_data_size >= target_size || + ( partial_data_size >= min_data_size && + partial_data_size + member_size / 2 > target_size ) ) + { partial_data_size = member_size; return true; } + partial_data_size += member_size; return false; + } + + +void set_error_status( const int retval ) + { + // prevent two threads from modifying the error_status at the same time + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + + xlock( &mutex ); + if( error_status < retval ) error_status = retval; + xunlock( &mutex ); + } + +int final_exit_status( int retval, const bool show_msg ) + { + if( retval == 0 && error_status ) + { if( show_msg ) + show_error( "Exiting with failure status due to previous errors." ); + retval = error_status; } + return retval; + } + +unsigned ustar_chksum( const Tar_header header ) + { + unsigned chksum = chksum_l * 0x20; // treat chksum field as spaces + for( int i = 0; i < chksum_o; ++i ) chksum += header[i]; + for( int i = chksum_o + chksum_l; i < header_size; ++i ) chksum += header[i]; + return chksum; + } + + +bool verify_ustar_chksum( const Tar_header header ) + { return ( verify_ustar_magic( header ) && + ustar_chksum( header ) == parse_octal( header + chksum_o, chksum_l ) ); } + + +bool has_lz_ext( const std::string & name ) + { + return ( name.size() > 3 && + name.compare( name.size() - 3, 3, ".lz" ) == 0 ) || + ( name.size() > 4 && + name.compare( name.size() - 4, 4, ".tlz" ) == 0 ); + } + + +int concatenate( const Cl_options & cl_opts ) + { + if( cl_opts.num_files <= 0 ) + { if( verbosity >= 1 ) show_error( "Nothing to concatenate." ); return 0; } + const bool to_stdout = cl_opts.archive_name.empty(); + archive_namep = to_stdout ? "(stdout)" : cl_opts.archive_name.c_str(); + const int outfd = + to_stdout ? STDOUT_FILENO : open_outstream( cl_opts.archive_name, false ); + if( outfd < 0 ) return 1; + if( !check_tty_out( archive_namep, outfd, to_stdout ) ) + { close( outfd ); return 1; } + if( !to_stdout && !archive_attrs.init( outfd ) ) + { show_file_error( archive_namep, "Can't stat", errno ); return 1; } + int compressed; // tri-state bool + if( to_stdout ) compressed = -1; // unknown + else + { + compressed = has_lz_ext( cl_opts.archive_name ); // default value + long long pos = check_compressed_appendable( outfd, true ); + if( pos > 0 ) compressed = true; + else if( pos < 0 ) + { + pos = check_uncompressed_appendable( outfd, true ); + if( pos > 0 ) compressed = false; + else if( pos == -2 ) { show_error( mem_msg ); close( outfd ); return 1; } + else if( pos < 0 ) + { show_file_error( archive_namep, compressed ? + "This does not look like an appendable tar.lz archive." : + "This does not look like an appendable tar archive." ); + close( outfd ); return 2; } + } + } + + int retval = 0; + bool eoa_pending = false; + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) // copy archives + { + if( !nonempty_arg( cl_opts.parser, i ) ) continue; // skip opts, empty names + const char * const filename = cl_opts.parser.argument( i ).c_str(); + if( Exclude::excluded( filename ) ) continue; // skip excluded files + const int infd = open_instream( filename ); + if( infd < 0 ) { retval = 1; break; } + struct stat st; + if( !to_stdout && fstat( infd, &st ) == 0 && archive_attrs.is_the_archive( st ) ) + { show_file_error( filename, "Archive can't contain itself; not concatenated." ); + close( infd ); continue; } + long long size; + if( compressed < 0 ) // not initialized yet + { + if( ( size = check_compressed_appendable( infd, false ) ) > 0 ) + compressed = true; + else if( ( size = check_uncompressed_appendable( infd, false ) ) > 0 ) + compressed = false; + else if( size != -2 ) { size = -1 ; compressed = has_lz_ext( filename ); } + } + else size = compressed ? check_compressed_appendable( infd, false ) : + check_uncompressed_appendable( infd, false ); + if( size == -2 ) + { show_error( mem_msg ); close( infd ); retval = 1; break; } + if( size < 0 ) + { show_file_error( filename, compressed ? + "Not an appendable tar.lz archive." : + "Not an appendable tar archive." ); + close( infd ); retval = 2; break; } + if( !copy_file( infd, outfd, size ) || close( infd ) != 0 ) + { show_file_error( filename, "Error copying archive", errno ); + eoa_pending = false; retval = 1; break; } + eoa_pending = true; + if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename ); + } + + if( eoa_pending && !write_eoa_records( outfd, compressed ) && retval == 0 ) + retval = 1; + if( close( outfd ) != 0 && retval == 0 ) + { show_file_error( archive_namep, eclosa_msg, errno ); retval = 1; } + return retval; + } + + +int encode( const Cl_options & cl_opts ) + { + if( !grbuf.size() ) { show_error( mem_msg ); return 1; } + const bool compressed = ( cl_opts.level >= 0 && cl_opts.level <= 9 ); + const bool to_stdout = cl_opts.archive_name.empty(); + archive_namep = to_stdout ? "(stdout)" : cl_opts.archive_name.c_str(); + gcl_opts = &cl_opts; + + if( !to_stdout && !compressed && has_lz_ext( cl_opts.archive_name ) ) + { show_file_error( archive_namep, + "Uncompressed mode incompatible with .lz extension." ); return 2; } + + const bool append = cl_opts.program_mode == m_append; + if( cl_opts.num_files <= 0 ) + { + if( !append && !to_stdout ) // create archive + { show_error( "Cowardly refusing to create an empty archive.", 0, true ); + return 1; } + else // create/append to stdout or append to archive + { if( verbosity >= 1 ) show_error( "Nothing to append." ); return 0; } + } + + if( to_stdout ) // create/append to stdout + goutfd = STDOUT_FILENO; + else // create/append to archive + if( ( goutfd = open_outstream( cl_opts.archive_name, !append ) ) < 0 ) + return 1; + if( !check_tty_out( archive_namep, goutfd, to_stdout ) ) + { close( goutfd ); return 1; } + if( append && !to_stdout ) + { + if( compressed && check_compressed_appendable( goutfd, true ) < 0 ) + { show_file_error( archive_namep, + "This does not look like an appendable tar.lz archive." ); + close( goutfd ); return 2; } + if( !compressed ) + { + const long long pos = check_uncompressed_appendable( goutfd, true ); + if( pos == -2 ) { show_error( mem_msg ); close( goutfd ); return 1; } + if( pos < 0 ) { show_file_error( archive_namep, + "This does not look like an appendable tar archive." ); + close( goutfd ); return 2; } + } + } + + if( !archive_attrs.init( goutfd ) ) + { show_file_error( archive_namep, "Can't stat", errno ); + close( goutfd ); return 1; } + + if( compressed ) + { + /* CWD is not per-thread; multi-threaded --create can't be used if a + -C option appears after a relative filename in the command line. */ + if( cl_opts.solidity != asolid && cl_opts.solidity != solid && + cl_opts.num_workers > 0 && + !option_C_after_relative_filename( cl_opts.parser ) ) + { + // show_file_error( archive_namep, "Multi-threaded --create" ); + return encode_lz( cl_opts, archive_namep, goutfd ); + } + encoder = LZ_compress_open( option_mapping[cl_opts.level].dictionary_size, + option_mapping[cl_opts.level].match_len_limit, LLONG_MAX ); + if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) + { + if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error ) + show_error( mem_msg2 ); + else + internal_error( "invalid argument to encoder." ); + close( goutfd ); return 1; + } + } + + int retval = 0; + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) // parse command line + { + const int code = cl_opts.parser.code( i ); + const std::string & arg = cl_opts.parser.argument( i ); + const char * filename = arg.c_str(); + if( code == 'C' && chdir( filename ) != 0 ) + { show_file_error( filename, chdir_msg, errno ); retval = 1; break; } + if( code ) continue; // skip options + if( cl_opts.parser.argument( i ).empty() ) continue; // skip empty names + std::string deslashed; // arg without trailing slashes + unsigned len = arg.size(); + while( len > 1 && arg[len-1] == '/' ) --len; + if( len < arg.size() ) + { deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); } + if( Exclude::excluded( filename ) ) continue; // skip excluded files + struct stat st; + if( lstat( filename, &st ) != 0 ) // filename from command line + { show_file_error( filename, cant_stat, errno ); set_error_status( 1 ); } + else if( ( retval = nftw( filename, add_member, 16, + cl_opts.dereference ? 0 : FTW_PHYS ) ) != 0 ) + break; // write error + else if( encoder && cl_opts.solidity == dsolid && !archive_write( 0, 0 ) ) + { retval = 1; break; } + } + + if( retval == 0 ) // write End-Of-Archive records + { + enum { bufsize = 2 * header_size }; + uint8_t buf[bufsize]; + std::memset( buf, 0, bufsize ); + if( encoder && + ( cl_opts.solidity == asolid || + ( cl_opts.solidity == bsolid && partial_data_size ) ) && + !archive_write( 0, 0 ) ) retval = 1; // flush encoder + else if( !archive_write( buf, bufsize ) || + ( encoder && !archive_write( 0, 0 ) ) ) retval = 1; + } + if( encoder && LZ_compress_close( encoder ) < 0 ) + { show_error( "LZ_compress_close failed." ); retval = 1; } + if( close( goutfd ) != 0 && retval == 0 ) + { show_file_error( archive_namep, eclosa_msg, errno ); retval = 1; } + return final_exit_status( retval ); + } diff --git a/create.h b/create.h new file mode 100644 index 0000000..cc7c72d --- /dev/null +++ b/create.h @@ -0,0 +1,48 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +class Archive_attrs + { + struct stat ast; // archive attributes at time of init + bool initialized; + bool isreg; + +public: + Archive_attrs() : initialized( false ), isreg( false ) {} + bool init( const int fd ) + { + if( fstat( fd, &ast ) != 0 ) return false; + if( S_ISREG( ast.st_mode ) ) isreg = true; + initialized = true; + return true; + } + bool is_the_archive( const struct stat & st ) const + { return isreg && st.st_dev == ast.st_dev && st.st_ino == ast.st_ino; } + bool is_newer( const struct stat & st ) const + { return initialized && st.st_mtime > ast.st_mtime; } + bool is_newer( const char * const filename ) const + { + if( !initialized ) return false; + struct stat st; + return lstat( filename, &st ) != 0 || st.st_mtime > ast.st_mtime; + } + }; + +extern Archive_attrs archive_attrs; + +const char * const cant_stat = "Can't stat input file"; +const char * const eferec_msg = "Error formatting extended records."; diff --git a/create_lz.cc b/create_lz.cc new file mode 100644 index 0000000..1acaf23 --- /dev/null +++ b/create_lz.cc @@ -0,0 +1,600 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <queue> +#include <pthread.h> +#include <stdint.h> // for lzlib.h +#include <unistd.h> +#include <sys/stat.h> +#include <ftw.h> +#include <lzlib.h> + +#include "tarlz.h" +#include "arg_parser.h" +#include "create.h" + + +namespace { + +const Cl_options * gcl_opts = 0; // local vars needed by add_member_lz +enum { max_packet_size = 1 << 20 }; +class Packet_courier; +Packet_courier * courierp = 0; +unsigned long long partial_data_size = 0; // size of current block + + +class Slot_tally + { + const int num_slots; // total slots + int num_free; // remaining free slots + pthread_mutex_t mutex; + pthread_cond_t slot_av; // slot available + + Slot_tally( const Slot_tally & ); // declared as private + void operator=( const Slot_tally & ); // declared as private + +public: + explicit Slot_tally( const int slots ) + : num_slots( slots ), num_free( slots ) + { xinit_mutex( &mutex ); xinit_cond( &slot_av ); } + + ~Slot_tally() { xdestroy_cond( &slot_av ); xdestroy_mutex( &mutex ); } + + bool all_free() { return ( num_free == num_slots ); } + + void get_slot() // wait for a free slot + { + xlock( &mutex ); + while( num_free <= 0 ) xwait( &slot_av, &mutex ); + --num_free; + xunlock( &mutex ); + } + + void leave_slot() // return a slot to the tally + { + xlock( &mutex ); + if( ++num_free == 1 ) xsignal( &slot_av ); // num_free was 0 + xunlock( &mutex ); + } + }; + + +struct Ipacket // filename, file size and headers + { + const long long file_size; + const std::string filename; // filename.empty() means end of lzip member + const Extended * const extended; + const uint8_t * const header; + + Ipacket() : file_size( 0 ), extended( 0 ), header( 0 ) {} + Ipacket( const char * const name, const long long s, + const Extended * const ext, const uint8_t * const head ) + : file_size( s ), filename( name ), extended( ext ), header( head ) {} + }; + +struct Opacket // compressed data to be written to the archive + { + const uint8_t * const data; // data == 0 means end of lzip member + const int size; // number of bytes in data (if any) + + Opacket() : data( 0 ), size( 0 ) {} + Opacket( uint8_t * const d, const int s ) : data( d ), size( s ) {} + }; + + +class Packet_courier // moves packets around + { +public: + unsigned icheck_counter; + unsigned iwait_counter; + unsigned ocheck_counter; + unsigned owait_counter; +private: + int receive_worker_id; // worker queue currently receiving packets + int deliver_worker_id; // worker queue currently delivering packets + Slot_tally slot_tally; // limits the number of input packets + std::vector< std::queue< const Ipacket * > > ipacket_queues; + std::vector< std::queue< const Opacket * > > opacket_queues; + int num_working; // number of workers still running + const int num_workers; // number of workers + const unsigned out_slots; // max output packets per queue + pthread_mutex_t imutex; + pthread_cond_t iav_or_eof; // input packet available or grouper done + pthread_mutex_t omutex; + pthread_cond_t oav_or_exit; // output packet available or all workers exited + std::vector< pthread_cond_t > slot_av; // output slot available + bool eof; // grouper done + + Packet_courier( const Packet_courier & ); // declared as private + void operator=( const Packet_courier & ); // declared as private + +public: + Packet_courier( const int workers, const int in_slots, const int oslots ) + : icheck_counter( 0 ), iwait_counter( 0 ), + ocheck_counter( 0 ), owait_counter( 0 ), + receive_worker_id( 0 ), deliver_worker_id( 0 ), + slot_tally( in_slots ), ipacket_queues( workers ), + opacket_queues( workers ), num_working( workers ), + num_workers( workers ), out_slots( oslots ), slot_av( workers ), + eof( false ) + { + xinit_mutex( &imutex ); xinit_cond( &iav_or_eof ); + xinit_mutex( &omutex ); xinit_cond( &oav_or_exit ); + for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] ); + } + + ~Packet_courier() + { + for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy_cond( &slot_av[i] ); + xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex ); + xdestroy_cond( &iav_or_eof ); xdestroy_mutex( &imutex ); + } + + /* Receive an ipacket from grouper. + If filename.empty() (end of lzip member token), move to next queue. */ + void receive_packet( const Ipacket * const ipacket ) + { + if( !ipacket->filename.empty() ) + slot_tally.get_slot(); // wait for a free slot + xlock( &imutex ); + ipacket_queues[receive_worker_id].push( ipacket ); + if( ipacket->filename.empty() && ++receive_worker_id >= num_workers ) + receive_worker_id = 0; + xbroadcast( &iav_or_eof ); + xunlock( &imutex ); + } + + // distribute an ipacket to a worker + const Ipacket * distribute_packet( const int worker_id ) + { + const Ipacket * ipacket = 0; + xlock( &imutex ); + ++icheck_counter; + while( ipacket_queues[worker_id].empty() && !eof ) + { + ++iwait_counter; + xwait( &iav_or_eof, &imutex ); + } + if( !ipacket_queues[worker_id].empty() ) + { + ipacket = ipacket_queues[worker_id].front(); + ipacket_queues[worker_id].pop(); + } + xunlock( &imutex ); + if( ipacket ) + { if( !ipacket->filename.empty() ) slot_tally.leave_slot(); } + else + { + // notify muxer when last worker exits + xlock( &omutex ); + if( --num_working == 0 ) xsignal( &oav_or_exit ); + xunlock( &omutex ); + } + return ipacket; + } + + // collect an opacket from a worker + void collect_packet( const Opacket * const opacket, const int worker_id ) + { + xlock( &omutex ); + if( opacket->data ) + { + while( opacket_queues[worker_id].size() >= out_slots ) + xwait( &slot_av[worker_id], &omutex ); + } + opacket_queues[worker_id].push( opacket ); + if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit ); + xunlock( &omutex ); + } + + /* Deliver an opacket to muxer. + If opacket data == 0, move to next queue and wait again. */ + const Opacket * deliver_packet() + { + const Opacket * opacket = 0; + xlock( &omutex ); + ++ocheck_counter; + while( true ) + { + while( opacket_queues[deliver_worker_id].empty() && num_working > 0 ) + { + ++owait_counter; + xwait( &oav_or_exit, &omutex ); + } + if( opacket_queues[deliver_worker_id].empty() ) break; + opacket = opacket_queues[deliver_worker_id].front(); + opacket_queues[deliver_worker_id].pop(); + if( opacket_queues[deliver_worker_id].size() + 1 == out_slots ) + xsignal( &slot_av[deliver_worker_id] ); + if( opacket->data ) break; + if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0; + delete opacket; opacket = 0; + } + xunlock( &omutex ); + return opacket; + } + + void finish() // grouper has no more packets to send + { + xlock( &imutex ); + eof = true; + xbroadcast( &iav_or_eof ); + xunlock( &imutex ); + } + + bool finished() // all packets delivered to muxer + { + if( !slot_tally.all_free() || !eof || num_working != 0 ) return false; + for( int i = 0; i < num_workers; ++i ) + if( !ipacket_queues[i].empty() ) return false; + for( int i = 0; i < num_workers; ++i ) + if( !opacket_queues[i].empty() ) return false; + return true; + } + }; + + +// send one ipacket with tar member metadata to courier +int add_member_lz( const char * const filename, const struct stat *, + const int flag, struct FTW * ) + { + if( Exclude::excluded( filename ) ) return 0; // skip excluded files + long long file_size; + // metadata for extended records + Extended * const extended = new( std::nothrow ) Extended; + uint8_t * const header = extended ? new( std::nothrow ) Tar_header : 0; + if( !header ) + { show_error( mem_msg ); if( extended ) delete extended; return 1; } + if( !fill_headers( filename, *extended, header, file_size, flag ) ) + { delete[] header; delete extended; return 0; } + print_removed_prefix( extended->removed_prefix ); + + if( gcl_opts->solidity == bsolid && + block_is_full( extended->full_size(), file_size, gcl_opts->data_size, + partial_data_size ) ) + courierp->receive_packet( new Ipacket ); // end of group + + courierp->receive_packet( new Ipacket( filename, file_size, extended, header ) ); + + if( gcl_opts->solidity == no_solid ) // one tar member per group + courierp->receive_packet( new Ipacket ); + if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename ); + return 0; + } + + +struct Grouper_arg + { + const Cl_options * cl_opts; + Packet_courier * courier; + }; + + +/* Package metadata of the files to be archived and pass them to the + courier for distribution to workers. +*/ +extern "C" void * grouper( void * arg ) + { + const Grouper_arg & tmp = *(const Grouper_arg *)arg; + const Cl_options & cl_opts = *tmp.cl_opts; + Packet_courier & courier = *tmp.courier; + + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) // parse command line + { + const int code = cl_opts.parser.code( i ); + const std::string & arg = cl_opts.parser.argument( i ); + const char * filename = arg.c_str(); + if( code == 'C' && chdir( filename ) != 0 ) + { show_file_error( filename, chdir_msg, errno ); exit_fail_mt(); } + if( code ) continue; // skip options + if( cl_opts.parser.argument( i ).empty() ) continue; // skip empty names + std::string deslashed; // arg without trailing slashes + unsigned len = arg.size(); + while( len > 1 && arg[len-1] == '/' ) --len; + if( len < arg.size() ) + { deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); } + if( Exclude::excluded( filename ) ) continue; // skip excluded files + struct stat st; + if( lstat( filename, &st ) != 0 ) // filename from command line + { show_file_error( filename, cant_stat, errno ); set_error_status( 1 ); } + else if( nftw( filename, add_member_lz, 16, + cl_opts.dereference ? 0 : FTW_PHYS ) != 0 ) + exit_fail_mt(); // write error or OOM + else if( cl_opts.solidity == dsolid ) // end of group + courier.receive_packet( new Ipacket ); + } + + if( cl_opts.solidity == bsolid && partial_data_size ) // finish last block + { partial_data_size = 0; courierp->receive_packet( new Ipacket ); } + courier.finish(); // no more packets to send + return 0; + } + + +/* Writes ibuf to encoder. To minimize dictionary size, it does not read + from encoder until encoder's input buffer is full or finish is true. + Sends opacket to courier and allocates new obuf each time obuf is full. +*/ +void loop_encode( const uint8_t * const ibuf, const int isize, + uint8_t * & obuf, int & opos, Packet_courier & courier, + LZ_Encoder * const encoder, const int worker_id, + const bool finish = false ) + { + int ipos = 0; + if( opos < 0 || opos > max_packet_size ) + internal_error( "bad buffer index in loop_encode." ); + while( true ) + { + if( ipos < isize ) + { + const int wr = LZ_compress_write( encoder, ibuf + ipos, isize - ipos ); + if( wr < 0 ) internal_error( "library error (LZ_compress_write)." ); + ipos += wr; + } + if( ipos >= isize ) // ibuf is empty + { if( finish ) LZ_compress_finish( encoder ); else break; } + const int rd = + LZ_compress_read( encoder, obuf + opos, max_packet_size - opos ); + if( rd < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "LZ_compress_read error: %s\n", + LZ_strerror( LZ_compress_errno( encoder ) ) ); + exit_fail_mt(); + } + opos += rd; + // obuf is full or last opacket in lzip member + if( opos >= max_packet_size || LZ_compress_finished( encoder ) == 1 ) + { + if( opos > max_packet_size ) + internal_error( "opacket size exceeded in worker." ); + courier.collect_packet( new Opacket( obuf, opos ), worker_id ); + opos = 0; obuf = new( std::nothrow ) uint8_t[max_packet_size]; + if( !obuf ) { show_error( mem_msg2 ); exit_fail_mt(); } + if( LZ_compress_finished( encoder ) == 1 ) + { + if( LZ_compress_restart_member( encoder, LLONG_MAX ) >= 0 ) break; + show_error( "LZ_compress_restart_member failed." ); exit_fail_mt(); + } + } + } + if( ipos > isize ) internal_error( "ipacket size exceeded in worker." ); + if( ipos < isize ) internal_error( "input not fully consumed in worker." ); + } + + +struct Worker_arg + { + Packet_courier * courier; + int dictionary_size; + int match_len_limit; + int worker_id; + }; + + +/* Get ipackets from courier, compress headers and file data, and give the + opackets produced to courier. +*/ +extern "C" void * cworker( void * arg ) + { + const Worker_arg & tmp = *(const Worker_arg *)arg; + Packet_courier & courier = *tmp.courier; + const int dictionary_size = tmp.dictionary_size; + const int match_len_limit = tmp.match_len_limit; + const int worker_id = tmp.worker_id; + + LZ_Encoder * encoder = 0; + uint8_t * data = 0; + Resizable_buffer rbuf; // extended header + data + if( !rbuf.size() ) { show_error( mem_msg2 ); exit_fail_mt(); } + + int opos = 0; + bool flushed = true; // avoid producing empty lzip members + while( true ) + { + const Ipacket * const ipacket = courier.distribute_packet( worker_id ); + if( !ipacket ) break; // no more packets to process + if( ipacket->filename.empty() ) // end of group + { + if( !flushed ) // this lzip member is not empty + loop_encode( 0, 0, data, opos, courier, encoder, worker_id, true ); + courier.collect_packet( new Opacket, worker_id ); // end of member token + flushed = true; delete ipacket; continue; + } + + const char * const filename = ipacket->filename.c_str(); + const int infd = + ipacket->file_size ? open_instream( filename ) : -1; + if( ipacket->file_size && infd < 0 ) // can't read file data + { delete[] ipacket->header; delete ipacket->extended; delete ipacket; + set_error_status( 1 ); continue; } // skip file + + flushed = false; + if( !encoder ) // init encoder just before using it + { + data = new( std::nothrow ) uint8_t[max_packet_size]; + encoder = LZ_compress_open( dictionary_size, match_len_limit, LLONG_MAX ); + if( !data || !encoder || LZ_compress_errno( encoder ) != LZ_ok ) + { + if( !data || !encoder || LZ_compress_errno( encoder ) == LZ_mem_error ) + show_error( mem_msg2 ); + else + internal_error( "invalid argument to encoder." ); + exit_fail_mt(); + } + } + + if( !ipacket->extended->empty() ) // compress extended block + { + const long long ebsize = ipacket->extended->format_block( rbuf ); + if( ebsize < 0 ) + { show_error( ( ebsize == -2 ) ? mem_msg2 : eferec_msg ); exit_fail_mt(); } + /* Limit the size of the extended block to INT_MAX - 1 so that it can + be fed to lzlib as one buffer. */ + if( ebsize >= INT_MAX ) + { show_error( "Extended records size >= INT_MAX." ); exit_fail_mt(); } + loop_encode( rbuf.u8(), ebsize, data, opos, courier, encoder, worker_id ); + } + // compress ustar header + loop_encode( ipacket->header, header_size, data, opos, courier, + encoder, worker_id ); + delete[] ipacket->header; delete ipacket->extended; + + if( ipacket->file_size ) + { + const long long bufsize = 32 * header_size; + uint8_t buf[bufsize]; + long long rest = ipacket->file_size; + while( rest > 0 ) + { + int size = std::min( rest, bufsize ); + const int rd = readblock( infd, buf, size ); + rest -= rd; + if( rd != size ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "File '%s' ends unexpectedly at pos %llu\n", + filename, ipacket->file_size - rest ); + close( infd ); exit_fail_mt(); + } + if( rest == 0 ) // last read + { + const int rem = ipacket->file_size % header_size; + if( rem > 0 ) + { const int padding = header_size - rem; + std::memset( buf + size, 0, padding ); size += padding; } + } + // compress size bytes of file + loop_encode( buf, size, data, opos, courier, encoder, worker_id ); + } + if( close( infd ) != 0 ) + { show_file_error( filename, eclosf_msg, errno ); exit_fail_mt(); } + } + if( gcl_opts->warn_newer && archive_attrs.is_newer( filename ) ) + { show_file_error( filename, "File is newer than the archive." ); + set_error_status( 1 ); } + delete ipacket; + } + if( data ) delete[] data; + if( encoder && LZ_compress_close( encoder ) < 0 ) + { show_error( "LZ_compress_close failed." ); exit_fail_mt(); } + return 0; + } + + +/* Get from courier the processed and sorted packets, and write + their contents to the output archive. +*/ +void muxer( Packet_courier & courier, const int outfd ) + { + while( true ) + { + const Opacket * const opacket = courier.deliver_packet(); + if( !opacket ) break; // queue is empty. all workers exited + + if( !writeblock_wrapper( outfd, opacket->data, opacket->size ) ) + exit_fail_mt(); + delete[] opacket->data; + delete opacket; + } + } + +} // end namespace + + +// init the courier, then start the grouper and the workers and call the muxer +int encode_lz( const Cl_options & cl_opts, const char * const archive_namep, + const int outfd ) + { + const int in_slots = 65536; // max small files (<=512B) in 64 MiB + const int num_workers = cl_opts.num_workers; + const int total_in_slots = ( INT_MAX / num_workers >= in_slots ) ? + num_workers * in_slots : INT_MAX; + const int dictionary_size = option_mapping[cl_opts.level].dictionary_size; + const int match_len_limit = option_mapping[cl_opts.level].match_len_limit; + gcl_opts = &cl_opts; + + /* If an error happens after any threads have been started, exit must be + called before courier goes out of scope. */ + Packet_courier courier( num_workers, total_in_slots, cl_opts.out_slots ); + courierp = &courier; // needed by add_member_lz + + Grouper_arg grouper_arg; + grouper_arg.cl_opts = &cl_opts; + grouper_arg.courier = &courier; + + pthread_t grouper_thread; + int errcode = pthread_create( &grouper_thread, 0, grouper, &grouper_arg ); + if( errcode ) + { show_error( "Can't create grouper thread", errcode ); return 1; } + + Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers]; + pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers]; + if( !worker_args || !worker_threads ) + { show_error( mem_msg ); exit_fail_mt(); } + for( int i = 0; i < num_workers; ++i ) + { + worker_args[i].courier = &courier; + worker_args[i].dictionary_size = dictionary_size; + worker_args[i].match_len_limit = match_len_limit; + worker_args[i].worker_id = i; + errcode = pthread_create( &worker_threads[i], 0, cworker, &worker_args[i] ); + if( errcode ) + { show_error( "Can't create worker threads", errcode ); exit_fail_mt(); } + } + + muxer( courier, outfd ); + + for( int i = num_workers - 1; i >= 0; --i ) + { + errcode = pthread_join( worker_threads[i], 0 ); + if( errcode ) + { show_error( "Can't join worker threads", errcode ); exit_fail_mt(); } + } + delete[] worker_threads; + delete[] worker_args; + + errcode = pthread_join( grouper_thread, 0 ); + if( errcode ) + { show_error( "Can't join grouper thread", errcode ); exit_fail_mt(); } + + // write End-Of-Archive records + int retval = !write_eoa_records( outfd, true ); + + if( close( outfd ) != 0 && retval == 0 ) + { show_file_error( archive_namep, eclosa_msg, errno ); retval = 1; } + + if( cl_opts.debug_level & 1 ) + std::fprintf( stderr, + "any worker tried to consume from grouper %8u times\n" + "any worker had to wait %8u times\n" + "muxer tried to consume from workers %8u times\n" + "muxer had to wait %8u times\n", + courier.icheck_counter, + courier.iwait_counter, + courier.ocheck_counter, + courier.owait_counter ); + + if( !courier.finished() ) internal_error( "courier not finished." ); + return final_exit_status( retval ); + } diff --git a/decode.cc b/decode.cc new file mode 100644 index 0000000..a45a1fd --- /dev/null +++ b/decode.cc @@ -0,0 +1,492 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cctype> +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <stdint.h> // for lzlib.h +#include <unistd.h> +#include <utime.h> +#include <sys/stat.h> +#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ + !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__ +#include <sys/sysmacros.h> // for major, minor, makedev +#endif +#include <lzlib.h> + +#include "tarlz.h" +#include "arg_parser.h" +#include "lzip_index.h" +#include "archive_reader.h" +#include "decode.h" + + +namespace { + +Resizable_buffer grbuf; + +bool skip_warn( const bool reset = false ) // avoid duplicate warnings + { + static bool skipping = false; + + if( reset ) skipping = false; + else if( !skipping ) + { skipping = true; show_error( "Skipping to next header." ); return true; } + return false; + } + + +void read_error( const Archive_reader & ar ) + { + show_file_error( ar.ad.namep, ar.e_msg(), ar.e_code() ); + if( ar.e_skip() ) skip_warn(); + } + + +int skip_member( Archive_reader & ar, const Extended & extended, + const Typeflag typeflag ) + { + if( data_may_follow( typeflag ) ) + { const int ret = ar.skip_member( extended ); + if( ret != 0 ) { read_error( ar ); if( ar.fatal() ) return ret; } } + return 0; + } + + +int compare_member( const Cl_options & cl_opts, Archive_reader & ar, + const Extended & extended, const Tar_header header ) + { + if( !show_member_name( extended, header, 1, grbuf ) ) return 1; + std::string estr, ostr; + const bool stat_differs = + !compare_file_type( estr, ostr, cl_opts, extended, header ); + if( estr.size() ) std::fputs( estr.c_str(), stderr ); + if( ostr.size() ) { std::fputs( ostr.c_str(), stdout ); std::fflush( stdout ); } + if( extended.file_size() <= 0 ) return 0; + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( ( typeflag != tf_regular && typeflag != tf_hiperf ) || stat_differs ) + return skip_member( ar, extended, typeflag ); + // else compare file contents + const char * const filename = extended.path().c_str(); + const int infd2 = open_instream( filename ); + if( infd2 < 0 ) + { set_error_status( 1 ); return skip_member( ar, extended, typeflag ); } + int retval = compare_file_contents( estr, ostr, ar, extended.file_size(), + filename, infd2 ); + if( retval ) { read_error( ar ); if( !ar.fatal() ) retval = 0; } + else { if( estr.size() ) std::fputs( estr.c_str(), stderr ); + if( ostr.size() ) + { std::fputs( ostr.c_str(), stdout ); std::fflush( stdout ); } } + return retval; + } + + +int list_member( Archive_reader & ar, + const Extended & extended, const Tar_header header ) + { + if( !show_member_name( extended, header, 0, grbuf ) ) return 1; + return skip_member( ar, extended, (Typeflag)header[typeflag_o] ); + } + + +int extract_member( const Cl_options & cl_opts, Archive_reader & ar, + const Extended & extended, const Tar_header header ) + { + const char * const filename = extended.path().c_str(); + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( contains_dotdot( filename ) ) + { + show_file_error( filename, dotdot_msg ); + return skip_member( ar, extended, typeflag ); + } + mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits + if( geteuid() != 0 && !cl_opts.preserve_permissions ) mode &= ~get_umask(); + int outfd = -1; + + if( !show_member_name( extended, header, 1, grbuf ) ) return 1; + // remove file (or empty dir) before extraction to prevent following links + std::remove( filename ); + if( !make_path( filename ) ) + { + show_file_error( filename, intdir_msg, errno ); + set_error_status( 1 ); + return skip_member( ar, extended, typeflag ); + } + + switch( typeflag ) + { + case tf_regular: + case tf_hiperf: + outfd = open_outstream( filename ); + if( outfd < 0 ) + { set_error_status( 1 ); return skip_member( ar, extended, typeflag ); } + break; + case tf_link: + case tf_symlink: + { + const char * const linkname = extended.linkpath().c_str(); + const bool hard = typeflag == tf_link; + if( ( hard && link( linkname, filename ) != 0 ) || + ( !hard && symlink( linkname, filename ) != 0 ) ) + { + print_error( errno, cantln_msg, hard ? "" : "sym", linkname, filename ); + set_error_status( 1 ); + } + } break; + case tf_directory: + if( mkdir( filename, mode ) != 0 && errno != EEXIST ) + { + show_file_error( filename, mkdir_msg, errno ); + set_error_status( 1 ); + } + break; + case tf_chardev: + case tf_blockdev: + { + const unsigned dev = + makedev( parse_octal( header + devmajor_o, devmajor_l ), + parse_octal( header + devminor_o, devminor_l ) ); + const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode; + if( mknod( filename, dmode, dev ) != 0 ) + { + show_file_error( filename, mknod_msg, errno ); + set_error_status( 1 ); + } + break; + } + case tf_fifo: + if( mkfifo( filename, mode ) != 0 ) + { + show_file_error( filename, mkfifo_msg, errno ); + set_error_status( 1 ); + } + break; + default: + print_error( 0, uftype_msg, filename, typeflag ); + set_error_status( 2 ); + return skip_member( ar, extended, typeflag ); + } + + const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); + errno = 0; + if( !islink && + ( !uid_gid_in_range( extended.get_uid(), extended.get_gid() ) || + chown( filename, extended.get_uid(), extended.get_gid() ) != 0 ) ) + { + if( outfd >= 0 ) mode &= ~( S_ISUID | S_ISGID | S_ISVTX ); + // chown will in many cases return with EPERM, which can be safely ignored. + if( errno != EPERM && errno != EINVAL ) + { show_file_error( filename, chown_msg, errno ); set_error_status( 1 ); } + } + + if( outfd >= 0 ) fchmod( outfd, mode ); // ignore errors + + if( data_may_follow( typeflag ) ) + { + const int bufsize = 32 * header_size; + uint8_t buf[bufsize]; + long long rest = extended.file_size(); + const int rem = rest % header_size; + const int padding = rem ? header_size - rem : 0; + while( rest > 0 ) + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; + const int ret = ar.read( buf, rsize ); + if( ret != 0 ) + { + read_error( ar ); + if( outfd >= 0 ) + { + if( cl_opts.keep_damaged ) + { writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) ); + close( outfd ); } + else { close( outfd ); std::remove( filename ); } + } + if( ar.fatal() ) return ret; else return 0; + } + const int wsize = ( rest >= bufsize ) ? bufsize : rest; + if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize ) + { show_file_error( filename, werr_msg, errno ); return 1; } + rest -= wsize; + } + } + if( outfd >= 0 && close( outfd ) != 0 ) + { show_file_error( filename, eclosf_msg, errno ); return 1; } + if( !islink ) + { + struct utimbuf t; + t.actime = extended.atime().sec(); + t.modtime = extended.mtime().sec(); + utime( filename, &t ); // ignore errors + } + return 0; + } + + +void format_file_diff( std::string & ostr, const char * const filename, + const char * const msg ) + { if( verbosity >= 0 ) + { ostr += filename; ostr += ": "; ostr += msg; ostr += '\n'; } } + +} // end namespace + + +bool compare_file_type( std::string & estr, std::string & ostr, + const Cl_options & cl_opts, + const Extended & extended, const Tar_header header ) + { + const char * const filename = extended.path().c_str(); + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + struct stat st; + bool diff = false, size_differs = false, type_differs = true; + if( hstat( filename, &st, cl_opts.dereference ) != 0 ) + format_file_error( estr, filename, "warning: Can't stat", errno ); + else if( ( typeflag == tf_regular || typeflag == tf_hiperf ) && + !S_ISREG( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a regular file" ); + else if( typeflag == tf_symlink && !S_ISLNK( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a symlink" ); + else if( typeflag == tf_chardev && !S_ISCHR( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a character device" ); + else if( typeflag == tf_blockdev && !S_ISBLK( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a block device" ); + else if( typeflag == tf_directory && !S_ISDIR( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a directory" ); + else if( typeflag == tf_fifo && !S_ISFIFO( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a FIFO" ); + else + { + type_differs = false; + if( typeflag != tf_symlink ) + { + const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits + if( mode != ( st.st_mode & ( S_ISUID | S_ISGID | S_ISVTX | + S_IRWXU | S_IRWXG | S_IRWXO ) ) ) + { format_file_diff( ostr, filename, "Mode differs" ); diff = true; } + } + if( !cl_opts.ignore_ids ) + { + if( extended.get_uid() != (long long)st.st_uid ) + { format_file_diff( ostr, filename, "Uid differs" ); diff = true; } + if( extended.get_gid() != (long long)st.st_gid ) + { format_file_diff( ostr, filename, "Gid differs" ); diff = true; } + } + if( typeflag != tf_symlink ) + { + if( typeflag != tf_directory && + extended.mtime().sec() != (long long)st.st_mtime ) + { + if( (time_t)extended.mtime().sec() == st.st_mtime ) + { if( !cl_opts.ignore_overflow ) { diff = true; + format_file_diff( ostr, filename, "Mod time overflow" ); } } + else { diff = true; + format_file_diff( ostr, filename, "Mod time differs" ); } + } + if( ( typeflag == tf_regular || typeflag == tf_hiperf ) && + extended.file_size() != st.st_size ) // don't compare contents + { format_file_diff( ostr, filename, "Size differs" ); size_differs = true; } + if( ( typeflag == tf_chardev || typeflag == tf_blockdev ) && + ( parse_octal( header + devmajor_o, devmajor_l ) != + (unsigned)major( st.st_rdev ) || + parse_octal( header + devminor_o, devminor_l ) != + (unsigned)minor( st.st_rdev ) ) ) + { format_file_diff( ostr, filename, "Device number differs" ); diff = true; } + } + else + { + char * const buf = new char[st.st_size+1]; + long len = readlink( filename, buf, st.st_size ); + bool e = ( len != st.st_size ); + if( !e ) + { + while( len > 1 && buf[len-1] == '/' ) --len; // trailing '/' + buf[len] = 0; + if( extended.linkpath() != buf ) e = true; + } + delete[] buf; + if( e ) { format_file_diff( ostr, filename, "Symlink differs" ); diff = true; } + } + } + if( diff || size_differs || type_differs ) set_error_status( 1 ); + return !( size_differs || type_differs ); + } + + +bool compare_file_contents( std::string & estr, std::string & ostr, + Archive_reader_base & ar, const long long file_size, + const char * const filename, const int infd2 ) + { + long long rest = file_size; + const int rem = rest % header_size; + const int padding = rem ? header_size - rem : 0; + const int bufsize = 32 * header_size; + uint8_t buf1[bufsize]; + uint8_t buf2[bufsize]; + int retval = 0; + bool diff = false; + estr.clear(); ostr.clear(); + while( rest > 0 ) + { + const int rsize1 = ( rest >= bufsize ) ? bufsize : rest + padding; + const int rsize2 = ( rest >= bufsize ) ? bufsize : rest; + if( ( retval = ar.read( buf1, rsize1 ) ) != 0 ) { diff = true; break; } + if( !diff ) + { + const int rd = readblock( infd2, buf2, rsize2 ); + if( rd != rsize2 ) + { + if( errno ) format_file_error( estr, filename, "Read error", errno ); + else format_file_diff( ostr, filename, "EOF found in file" ); + diff = true; + } + else + { + int i = 0; while( i < rsize2 && buf1[i] == buf2[i] ) ++i; + if( i < rsize2 ) + { format_file_diff( ostr, filename, "Contents differ" ); diff = true; } + } + } + if( rest < bufsize ) break; + rest -= rsize1; + } + close( infd2 ); + if( diff ) set_error_status( 1 ); + return retval; + } + + +int decode( const Cl_options & cl_opts ) + { + if( !grbuf.size() ) { show_error( mem_msg ); return 1; } + // open archive before changing working directory + const Archive_descriptor ad( cl_opts.archive_name ); + if( ad.infd < 0 ) return 1; + + // Execute -C options and mark filenames to be compared, extracted or listed. + // name_pending is of type char instead of bool to allow concurrent update. + std::vector< char > name_pending( cl_opts.parser.arguments(), false ); + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + { + const int code = cl_opts.parser.code( i ); + if( code == 'C' && cl_opts.program_mode != m_list ) + { + const char * const dir = cl_opts.parser.argument( i ).c_str(); + if( chdir( dir ) != 0 ) + { show_file_error( dir, chdir_msg, errno ); return 1; } + } + if( !code && cl_opts.parser.argument( i ).size() && + !Exclude::excluded( cl_opts.parser.argument( i ).c_str() ) ) + name_pending[i] = true; + } + + // multi-threaded --list is faster even with 1 thread and 1 file in archive + // but multi-threaded --diff and --extract probably need at least 2 of each + if( ( cl_opts.program_mode == m_diff || cl_opts.program_mode == m_list || + cl_opts.program_mode == m_extract ) && cl_opts.num_workers > 0 && + ad.indexed && ad.lzip_index.members() >= 2 ) // one file + EOA + { + // show_file_error( ad.namep, "Is compressed seekable" ); + return decode_lz( cl_opts, ad, name_pending ); + } + + Archive_reader ar( ad ); // serial reader + Extended extended; // metadata from extended records + int retval = 0; + bool prev_extended = false; // prev header was extended + while( true ) // process one tar header per iteration + { + Tar_header header; + const int ret = ar.read( header, header_size ); + if( ret != 0 ) { read_error( ar ); if( ar.fatal() ) { retval = ret; break; } } + if( ret != 0 || !verify_ustar_chksum( header ) ) // error or EOA + { + if( ret == 0 && block_is_zero( header, header_size ) ) // EOA + { + if( !prev_extended || cl_opts.permissive ) break; + show_file_error( ad.namep, fv_msg1 ); + retval = 2; break; + } + if( skip_warn() && verbosity >= 2 ) + std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( header ) ); + set_error_status( 2 ); continue; + } + skip_warn( true ); // reset warning + + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_global ) + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg2 ); retval = 2; break; } + Extended dummy; // global headers are parsed and ignored + const int ret = ar.parse_records( dummy, header, grbuf, gblrec_msg, true ); + if( ret != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); + if( ar.fatal() ) { retval = ret; break; } + skip_warn(); set_error_status( ret ); } + continue; + } + if( typeflag == tf_extended ) + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg3 ); retval = 2; break; } + const int ret = ar.parse_records( extended, header, grbuf, extrec_msg, + cl_opts.permissive ); + if( ret != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); + if( ar.fatal() ) { retval = ret; break; } + skip_warn(); extended.reset(); set_error_status( ret ); } + else if( !extended.crc_present() && cl_opts.missing_crc ) + { show_file_error( ad.namep, miscrc_msg ); retval = 2; break; } + prev_extended = true; continue; + } + prev_extended = false; + + extended.fill_from_ustar( header ); // copy metadata from header + + // members without name are skipped except when listing + if( check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) + retval = skip_member( ar, extended, typeflag ); + else + { + print_removed_prefix( extended.removed_prefix ); + if( cl_opts.program_mode == m_list ) + retval = list_member( ar, extended, header ); + else if( extended.path().empty() ) + retval = skip_member( ar, extended, typeflag ); + else if( cl_opts.program_mode == m_diff ) + retval = compare_member( cl_opts, ar, extended, header ); + else retval = extract_member( cl_opts, ar, extended, header ); + } + extended.reset(); + if( retval ) + { show_error( "Error is not recoverable: exiting now." ); break; } + } + + if( close( ad.infd ) != 0 && retval == 0 ) + { show_file_error( ad.namep, eclosa_msg, errno ); retval = 1; } + + if( retval == 0 ) + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + if( nonempty_arg( cl_opts.parser, i ) && name_pending[i] ) + { show_file_error( cl_opts.parser.argument( i ).c_str(), nfound_msg ); + retval = 1; } + return final_exit_status( retval, cl_opts.program_mode != m_diff ); + } diff --git a/decode.h b/decode.h new file mode 100644 index 0000000..45143fd --- /dev/null +++ b/decode.h @@ -0,0 +1,32 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +inline bool data_may_follow( const Typeflag typeflag ) + { return typeflag <= 0 || typeflag >= 7; } + +inline bool uid_gid_in_range( const long long uid, const long long gid ) + { return uid == (long long)( (uid_t)uid ) && + gid == (long long)( (gid_t)gid ); } + +const char * const dotdot_msg = "Contains a '..' component, skipping."; +const char * const intdir_msg = "Failed to create intermediate directory"; +const char * const cantln_msg = "Can't %slink '%s' to '%s'"; +const char * const mkdir_msg = "Can't create directory"; +const char * const mknod_msg = "Can't create device node"; +const char * const mkfifo_msg = "Can't create FIFO file"; +const char * const uftype_msg = "%s: Unknown file type '%c', skipping."; +const char * const chown_msg = "Can't change file owner"; diff --git a/decode_lz.cc b/decode_lz.cc new file mode 100644 index 0000000..8780eab --- /dev/null +++ b/decode_lz.cc @@ -0,0 +1,763 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <queue> +#include <pthread.h> +#include <stdint.h> // for lzlib.h +#include <unistd.h> +#include <utime.h> +#include <sys/stat.h> +#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ + !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__ +#include <sys/sysmacros.h> // for major, minor, makedev +#endif +#include <lzlib.h> + +#include "tarlz.h" +#include "arg_parser.h" +#include "lzip_index.h" +#include "archive_reader.h" +#include "decode.h" + +/* When a problem is detected by any worker: + - the worker requests mastership and returns. + - the courier discards new packets received or collected. + - the other workers return. + - the muxer drains the queue and returns. */ + +namespace { + +const char * const other_msg = "Other worker found an error."; + +/* line is preformatted and newline terminated except for prefix, error. + ok with an empty line is a no-op. */ +struct Packet // member name and metadata or error message + { + enum Status { ok, member_done, diag, prefix, error1, error2 }; + + long member_id; // lzip member containing the header of this tar member + std::string line; // member name and metadata ready to print, if any + Status status; // diagnostics and errors go to stderr + int errcode; // for error + Packet( const long i, const char * const msg, const Status s, const int e ) + : member_id( i ), line( msg ), status( s ), errcode( e ) {} + }; + + +class Packet_courier // moves packets around + { +public: + unsigned ocheck_counter; + unsigned owait_counter; +private: + long error_member_id; // first lzip member with error/misalign/eoa/eof + int deliver_worker_id; // worker queue currently delivering packets + int master_worker_id; // worker in charge if error/misalign/eoa/eof + std::vector< std::queue< const Packet * > > opacket_queues; + int num_working; // number of workers still running + const int num_workers; // number of workers + const unsigned out_slots; // max output packets per queue + pthread_mutex_t omutex; + pthread_cond_t oav_or_exit; // output packet available or all workers exited + std::vector< pthread_cond_t > slot_av; // output slot available + pthread_cond_t check_master; + bool eoa_found_; // EOA blocks found + + Packet_courier( const Packet_courier & ); // declared as private + void operator=( const Packet_courier & ); // declared as private + +public: + Packet_courier( const int workers, const int slots ) + : ocheck_counter( 0 ), owait_counter( 0 ), + error_member_id( -1 ), deliver_worker_id( 0 ), master_worker_id( -1 ), + opacket_queues( workers ), num_working( workers ), + num_workers( workers ), out_slots( slots ), slot_av( workers ), + eoa_found_( false ) + { + xinit_mutex( &omutex ); xinit_cond( &oav_or_exit ); + for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] ); + xinit_cond( &check_master ); + } + + ~Packet_courier() + { + xdestroy_cond( &check_master ); + for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy_cond( &slot_av[i] ); + xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex ); + } + + bool eoa_found() const { return eoa_found_; } + void report_eoa() { eoa_found_ = true; } + + bool mastership_granted() const { return master_worker_id >= 0; } + + bool request_mastership( const long member_id, const int worker_id ) + { + xlock( &omutex ); + if( mastership_granted() ) // already granted + { xunlock( &omutex ); return ( master_worker_id == worker_id ); } + if( error_member_id < 0 || error_member_id > member_id ) + error_member_id = member_id; + while( !mastership_granted() && ( worker_id != deliver_worker_id || + !opacket_queues[deliver_worker_id].empty() ) ) + xwait( &check_master, &omutex ); + if( !mastership_granted() && worker_id == deliver_worker_id && + opacket_queues[deliver_worker_id].empty() ) + { + master_worker_id = worker_id; // grant mastership + for( int i = 0; i < num_workers; ++i ) // delete all packets + while( !opacket_queues[i].empty() ) + opacket_queues[i].pop(); + xbroadcast( &check_master ); + xunlock( &omutex ); + return true; + } + xunlock( &omutex ); + return false; // mastership granted to another worker + } + + void worker_finished() + { + // notify muxer when last worker exits + xlock( &omutex ); + if( --num_working == 0 ) xsignal( &oav_or_exit ); + xunlock( &omutex ); + } + + /* Collect a packet from a worker. + If a packet is rejected, the worker must terminate. */ + bool collect_packet( const long member_id, const int worker_id, + const char * const msg, const Packet::Status status, + const int errcode = 0 ) + { + const Packet * const opacket = new Packet( member_id, msg, status, errcode ); + xlock( &omutex ); + if( ( mastership_granted() && master_worker_id != worker_id ) || + ( error_member_id >= 0 && error_member_id < opacket->member_id ) ) + { xunlock( &omutex ); delete opacket; return false; } // reject packet + while( opacket_queues[worker_id].size() >= out_slots ) + xwait( &slot_av[worker_id], &omutex ); + opacket_queues[worker_id].push( opacket ); + if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit ); + xunlock( &omutex ); + return true; + } + + /* Deliver a packet to muxer. + If packet.status == Packet::member_done, move to next queue. + If packet.line.empty(), wait again (empty lzip member). */ + const Packet * deliver_packet() + { + const Packet * opacket = 0; + xlock( &omutex ); + ++ocheck_counter; + while( true ) + { + while( opacket_queues[deliver_worker_id].empty() && num_working > 0 ) + { + ++owait_counter; + if( !mastership_granted() && error_member_id >= 0 ) + xbroadcast( &check_master ); // mastership requested not yet granted + xwait( &oav_or_exit, &omutex ); + } + if( opacket_queues[deliver_worker_id].empty() ) break; + opacket = opacket_queues[deliver_worker_id].front(); + opacket_queues[deliver_worker_id].pop(); + if( opacket_queues[deliver_worker_id].size() + 1 == out_slots ) + xsignal( &slot_av[deliver_worker_id] ); + if( opacket->status == Packet::member_done && !mastership_granted() ) + { if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0; } + if( !opacket->line.empty() ) break; + delete opacket; opacket = 0; + } + xunlock( &omutex ); + return opacket; + } + + bool finished() // all packets delivered to muxer + { + if( num_working != 0 ) return false; + for( int i = 0; i < num_workers; ++i ) + if( !opacket_queues[i].empty() ) return false; + return true; + } + }; + + +// prevent two threads from extracting the same file at the same time +class Name_monitor + { + std::vector< unsigned > crc_vector; + std::vector< std::string > name_vector; + pthread_mutex_t mutex; + +public: + Name_monitor( const int num_workers ) + : crc_vector( num_workers ), name_vector( num_workers ) + { if( num_workers > 0 ) xinit_mutex( &mutex ); } + + bool reserve_name( const unsigned worker_id, const std::string & filename ) + { + // compare the CRCs of the names, verify collisions comparing the names + const unsigned crc = + crc32c.compute_crc( (const uint8_t *)filename.c_str(), filename.size() ); + xlock( &mutex ); + for( unsigned i = 0; i < crc_vector.size(); ++i ) + if( crc_vector[i] == crc && crc != 0 && i != worker_id && + name_vector[i] == filename ) + { xunlock( &mutex ); return false; } // filename already reserved + crc_vector[worker_id] = crc; name_vector[worker_id] = filename; + xunlock( &mutex ); + return true; + } + }; + + +struct Trival // triple result value + { + const char * msg; + int errcode; + int retval; + explicit Trival( const char * const s = 0, const int e = 0, const int r = 0 ) + : msg( s ), errcode( e ), retval( r ) {} + }; + + +Trival skip_member_lz( Archive_reader_i & ar, Packet_courier & courier, + const Extended & extended, const long member_id, + const int worker_id, const Typeflag typeflag ) + { + if( data_may_follow( typeflag ) ) + { const int ret = ar.skip_member( extended ); + if( ret != 0 ) return Trival( ar.e_msg(), ar.e_code(), ret ); } + if( ar.at_member_end() && + !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) + return Trival( other_msg, 0, 1); + return Trival(); + } + + +Trival compare_member_lz( const Cl_options & cl_opts, + Archive_reader_i & ar, Packet_courier & courier, + const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const long member_id, + const int worker_id ) + { + if( verbosity < 1 ) rbuf()[0] = 0; + else if( !format_member_name( extended, header, rbuf, verbosity > 1 ) ) + return Trival( mem_msg, 0, 1 ); + std::string estr, ostr; + const bool stat_differs = + !compare_file_type( estr, ostr, cl_opts, extended, header ); + if( ( rbuf()[0] && !courier.collect_packet( member_id, worker_id, rbuf(), + Packet::ok ) ) || + ( estr.size() && !courier.collect_packet( member_id, worker_id, + estr.c_str(), Packet::diag ) ) || + ( ostr.size() && !courier.collect_packet( member_id, worker_id, + ostr.c_str(), Packet::ok ) ) || + ( extended.file_size() <= 0 && ar.at_member_end() && + !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) ) + return Trival( other_msg, 0, 1 ); + if( extended.file_size() <= 0 ) return Trival(); + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( ( typeflag != tf_regular && typeflag != tf_hiperf ) || stat_differs ) + return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag ); + // else compare file contents + const char * const filename = extended.path().c_str(); + const int infd2 = open_instream( filename ); + if( infd2 < 0 ) { set_error_status( 1 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag ); } + const int ret = compare_file_contents( estr, ostr, ar, extended.file_size(), + filename, infd2 ); + if( ret != 0 ) return Trival( ar.e_msg(), ar.e_code(), ret ); + if( ( estr.size() && !courier.collect_packet( member_id, worker_id, + estr.c_str(), Packet::diag ) ) || + ( ostr.size() && !courier.collect_packet( member_id, worker_id, + ostr.c_str(), Packet::ok ) ) || + ( ar.at_member_end() && + !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) ) + return Trival( other_msg, 0, 1 ); + return Trival(); + } + + +Trival list_member_lz( Archive_reader_i & ar, Packet_courier & courier, + const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const long member_id, + const int worker_id ) + { + if( verbosity < 0 ) rbuf()[0] = 0; + else if( !format_member_name( extended, header, rbuf, verbosity > 0 ) ) + return Trival( mem_msg, 0, 1 ); + const int ret = data_may_follow( (Typeflag)header[typeflag_o] ) ? + ar.skip_member( extended ) : 0; // print name even on read error + if( !courier.collect_packet( member_id, worker_id, rbuf(), + ar.at_member_end() ? Packet::member_done : Packet::ok ) ) + return Trival( other_msg, 0, 1 ); + if( ret != 0 ) return Trival( ar.e_msg(), ar.e_code(), ret ); + return Trival(); + } + + +Trival extract_member_lz( const Cl_options & cl_opts, + Archive_reader_i & ar, Packet_courier & courier, + const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const long member_id, + const int worker_id, Name_monitor & name_monitor ) + { + const char * const filename = extended.path().c_str(); + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( contains_dotdot( filename ) ) + { + if( format_file_error( rbuf, filename, dotdot_msg ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag ); + } + // skip member if another copy is already being extracted by another thread + if( !name_monitor.reserve_name( worker_id, extended.path() ) ) + { + if( verbosity >= 3 && format_file_error( rbuf, filename, + "Is being extracted by another thread, skipping." ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag ); + } + mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits + if( geteuid() != 0 && !cl_opts.preserve_permissions ) mode &= ~get_umask(); + int outfd = -1; + + if( verbosity >= 1 ) + { + if( !format_member_name( extended, header, rbuf, verbosity > 1 ) ) + return Trival( mem_msg, 0, 1 ); + if( !courier.collect_packet( member_id, worker_id, rbuf(), Packet::ok ) ) + return Trival( other_msg, 0, 1 ); + } + /* Remove file before extraction to prevent following links. + Don't remove an empty dir because other thread may need it. */ + if( typeflag != tf_directory ) std::remove( filename ); + if( !make_path( filename ) ) + { + if( format_file_error( rbuf, filename, intdir_msg, errno ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag ); + } + + switch( typeflag ) + { + case tf_regular: + case tf_hiperf: + outfd = open_outstream( filename, true, &rbuf ); + if( outfd < 0 ) + { + if( verbosity >= 0 && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, + typeflag ); + } + break; + case tf_link: + case tf_symlink: + { + const char * const linkname = extended.linkpath().c_str(); + const bool hard = typeflag == tf_link; + if( ( hard && link( linkname, filename ) != 0 ) || + ( !hard && symlink( linkname, filename ) != 0 ) ) + { + if( format_error( rbuf, errno, cantln_msg, hard ? "" : "sym", + linkname, filename ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); + } + } break; + case tf_directory: + { + struct stat st; + bool exists = ( stat( filename, &st ) == 0 ); + if( exists && !S_ISDIR( st.st_mode ) ) + { exists = false; std::remove( filename ); } + if( !exists && mkdir( filename, mode ) != 0 && errno != EEXIST ) + { + if( format_file_error( rbuf, filename, mkdir_msg, errno ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); + } + } break; + case tf_chardev: + case tf_blockdev: + { + const unsigned dev = + makedev( parse_octal( header + devmajor_o, devmajor_l ), + parse_octal( header + devminor_o, devminor_l ) ); + const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode; + if( mknod( filename, dmode, dev ) != 0 ) + { + if( format_file_error( rbuf, filename, mknod_msg, errno ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); + } + break; + } + case tf_fifo: + if( mkfifo( filename, mode ) != 0 ) + { + if( format_file_error( rbuf, filename, mkfifo_msg, errno ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); + } + break; + default: + if( format_error( rbuf, 0, uftype_msg, filename, typeflag ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 2 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, + typeflag ); + } + + const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); + errno = 0; + if( !islink && + ( !uid_gid_in_range( extended.get_uid(), extended.get_gid() ) || + chown( filename, extended.get_uid(), extended.get_gid() ) != 0 ) ) + { + if( outfd >= 0 ) mode &= ~( S_ISUID | S_ISGID | S_ISVTX ); + // chown will in many cases return with EPERM, which can be safely ignored. + if( errno != EPERM && errno != EINVAL ) + { + if( format_file_error( rbuf, filename, chown_msg, errno ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); + } + } + + if( outfd >= 0 ) fchmod( outfd, mode ); // ignore errors + + if( data_may_follow( typeflag ) ) + { + const int bufsize = 32 * header_size; + uint8_t buf[bufsize]; + long long rest = extended.file_size(); + const int rem = rest % header_size; + const int padding = rem ? header_size - rem : 0; + while( rest > 0 ) + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; + const int ret = ar.read( buf, rsize ); + if( ret != 0 ) + { + if( outfd >= 0 ) + { + if( cl_opts.keep_damaged ) + { writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) ); + close( outfd ); } + else { close( outfd ); std::remove( filename ); } + } + return Trival( ar.e_msg(), ar.e_code(), ret ); + } + const int wsize = ( rest >= bufsize ) ? bufsize : rest; + if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize ) + { format_file_error( rbuf, filename, werr_msg, errno ); + return Trival( rbuf(), 0, 1 ); } + rest -= wsize; + } + } + if( outfd >= 0 && close( outfd ) != 0 ) + { format_file_error( rbuf, filename, eclosf_msg, errno ); + return Trival( rbuf(), 0, 1 ); } + if( !islink ) + { + struct utimbuf t; + t.actime = extended.atime().sec(); + t.modtime = extended.mtime().sec(); + utime( filename, &t ); // ignore errors + } + if( ar.at_member_end() && + !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) + return Trival( other_msg, 0, 1 ); + return Trival(); + } + + +struct Worker_arg + { + const Cl_options * cl_opts; + const Archive_descriptor * ad; + Packet_courier * courier; + Name_monitor * name_monitor; + std::vector< char > * name_pending; + int worker_id; + int num_workers; + }; + + +/* Read lzip members from archive, decode their tar members, and give the + packets produced to courier. +*/ +extern "C" void * dworker( void * arg ) + { + const Worker_arg & tmp = *(const Worker_arg *)arg; + const Cl_options & cl_opts = *tmp.cl_opts; + const Archive_descriptor & ad = *tmp.ad; + Packet_courier & courier = *tmp.courier; + Name_monitor & name_monitor = *tmp.name_monitor; + std::vector< char > & name_pending = *tmp.name_pending; + const int worker_id = tmp.worker_id; + const int num_workers = tmp.num_workers; + + bool master = false; + Resizable_buffer rbuf; + Archive_reader_i ar( ad ); // 1 of N parallel readers + if( !rbuf.size() || ar.fatal() ) + { if( courier.request_mastership( worker_id, worker_id ) ) + courier.collect_packet( worker_id, worker_id, mem_msg, Packet::error1 ); + goto done; } + + for( long i = worker_id; !master && i < ad.lzip_index.members(); i += num_workers ) + { + if( ad.lzip_index.dblock( i ).size() <= 0 ) // empty lzip member + { + if( courier.collect_packet( i, worker_id, "", Packet::member_done ) ) + continue; else break; + } + + long long data_end = ad.lzip_index.dblock( i ).end(); + Extended extended; // metadata from extended records + bool prev_extended = false; // prev header was extended + ar.set_member( i ); // prepare for new member + while( true ) // process one tar header per iteration + { + if( ar.data_pos() >= data_end ) // dblock.end or udata_size + { + if( ar.data_pos() == data_end && !prev_extended ) break; + // member end exceeded or ends in extended, process rest of file + if( !courier.request_mastership( i, worker_id ) ) goto done; + master = true; + if( data_end >= ad.lzip_index.udata_size() ) + { courier.collect_packet( i, worker_id, end_msg, Packet::error2 ); + goto done; } + data_end = ad.lzip_index.udata_size(); + if( ar.data_pos() == data_end && !prev_extended ) break; + } + Tar_header header; + const int ret = ar.read( header, header_size ); + if( ret != 0 ) + { if( courier.request_mastership( i, worker_id ) ) + courier.collect_packet( i, worker_id, ar.e_msg(), + ( ret == 1 ) ? Packet::error1 : Packet::error2, ar.e_code() ); + goto done; } + if( !verify_ustar_chksum( header ) ) // error or EOA + { + if( !courier.request_mastership( i, worker_id ) ) goto done; + if( block_is_zero( header, header_size ) ) // EOA + { + if( !prev_extended || cl_opts.permissive ) courier.report_eoa(); + else courier.collect_packet( i, worker_id, fv_msg1, Packet::error2 ); + goto done; + } + courier.collect_packet( i, worker_id, ( ar.data_pos() > header_size ) ? + bad_hdr_msg : posix_lz_msg, Packet::error2 ); + goto done; + } + + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_global ) + { + const char * msg = 0; int ret = 2; + Extended dummy; // global headers are parsed and ignored + if( prev_extended && !cl_opts.permissive ) msg = fv_msg2; + else if( ( ret = ar.parse_records( dummy, header, rbuf, gblrec_msg, + true ) ) != 0 ) msg = ar.e_msg(); + else + { + if( ar.data_pos() == data_end && // end of lzip member or EOF + !courier.collect_packet( i, worker_id, "", Packet::member_done ) ) + goto done; + continue; + } + if( courier.request_mastership( i, worker_id ) ) + courier.collect_packet( i, worker_id, msg, ( ret == 1 ) ? + Packet::error1 : Packet::error2 ); + goto done; + } + if( typeflag == tf_extended ) + { + const char * msg = 0; int ret = 2; + if( prev_extended && !cl_opts.permissive ) msg = fv_msg3; + else if( ( ret = ar.parse_records( extended, header, rbuf, extrec_msg, + cl_opts.permissive ) ) != 0 ) msg = ar.e_msg(); + else if( !extended.crc_present() && cl_opts.missing_crc ) + { msg = miscrc_msg; ret = 2; } + else { prev_extended = true; continue; } + if( courier.request_mastership( i, worker_id ) ) + courier.collect_packet( i, worker_id, msg, ( ret == 1 ) ? + Packet::error1 : Packet::error2 ); + goto done; + } + prev_extended = false; + + extended.fill_from_ustar( header ); // copy metadata from header + + /* Skip members with an empty name in the ustar header. If there is an + extended header in a previous lzip member, its worker will request + mastership. Else the ustar-only unnamed member will be ignored. */ + Trival trival; + if( check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) + trival = skip_member_lz( ar, courier, extended, i, worker_id, typeflag ); + else + { + std::string rpmsg; + if( print_removed_prefix( extended.removed_prefix, &rpmsg ) && + !courier.collect_packet( i, worker_id, rpmsg.c_str(), Packet::prefix ) ) + { trival = Trival( other_msg, 0, 1 ); goto fatal; } + if( cl_opts.program_mode == m_list ) + trival = list_member_lz( ar, courier, extended, header, rbuf, i, worker_id ); + else if( extended.path().empty() ) + trival = skip_member_lz( ar, courier, extended, i, worker_id, typeflag ); + else if( cl_opts.program_mode == m_diff ) + trival = compare_member_lz( cl_opts, ar, courier, extended, header, + rbuf, i, worker_id ); + else trival = extract_member_lz( cl_opts, ar, courier, extended, header, + rbuf, i, worker_id, name_monitor ); + } + if( trival.retval ) // fatal error +fatal: { if( courier.request_mastership( i, worker_id ) ) + courier.collect_packet( i, worker_id, trival.msg, + ( trival.retval == 1 ) ? Packet::error1 : Packet::error2, + trival.errcode ); + goto done; } + extended.reset(); + } + } +done: + courier.worker_finished(); + return 0; + } + + +/* Get from courier the processed and sorted packets, and print + the member lines on stdout or the diagnostics and errors on stderr. +*/ +void muxer( const char * const archive_namep, Packet_courier & courier ) + { + int retval = 0; + while( retval == 0 ) + { + const Packet * const opacket = courier.deliver_packet(); + if( !opacket ) break; // queue is empty. all workers exited + + switch( opacket->status ) + { + case Packet::error1: + case Packet::error2: + show_file_error( archive_namep, opacket->line.c_str(), opacket->errcode ); + retval = ( opacket->status == Packet::error1 ) ? 1 : 2; break; + case Packet::prefix: show_error( opacket->line.c_str() ); break; + case Packet::diag: std::fputs( opacket->line.c_str(), stderr ); break; + default: if( opacket->line.size() ) + { std::fputs( opacket->line.c_str(), stdout ); std::fflush( stdout ); } + } + delete opacket; + } + if( retval == 0 && !courier.eoa_found() ) // no worker found EOA blocks + { show_file_error( archive_namep, end_msg ); retval = 2; } + if( retval ) exit_fail_mt( retval ); + } + +} // end namespace + + +// init the courier, then start the workers and call the muxer. +int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, + std::vector< char > & name_pending ) + { + const int out_slots = 65536; // max small files (<=512B) in 64 MiB + const int num_workers = // limited to number of members + std::min( (long)cl_opts.num_workers, ad.lzip_index.members() ); + if( cl_opts.program_mode == m_extract ) get_umask(); // cache the umask + Name_monitor + name_monitor( ( cl_opts.program_mode == m_extract ) ? num_workers : 0 ); + + /* If an error happens after any threads have been started, exit must be + called before courier goes out of scope. */ + Packet_courier courier( num_workers, out_slots ); + + Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers]; + pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers]; + if( !worker_args || !worker_threads ) { show_error( mem_msg ); return 1; } + for( int i = 0; i < num_workers; ++i ) + { + worker_args[i].cl_opts = &cl_opts; + worker_args[i].ad = &ad; + worker_args[i].courier = &courier; + worker_args[i].name_monitor = &name_monitor; + worker_args[i].name_pending = &name_pending; + worker_args[i].worker_id = i; + worker_args[i].num_workers = num_workers; + const int errcode = + pthread_create( &worker_threads[i], 0, dworker, &worker_args[i] ); + if( errcode ) + { show_error( "Can't create worker threads", errcode ); exit_fail_mt(); } + } + + muxer( ad.namep, courier ); + + for( int i = num_workers - 1; i >= 0; --i ) + { + const int errcode = pthread_join( worker_threads[i], 0 ); + if( errcode ) + { show_error( "Can't join worker threads", errcode ); exit_fail_mt(); } + } + delete[] worker_threads; + delete[] worker_args; + + int retval = 0; + if( close( ad.infd ) != 0 ) + { show_file_error( ad.namep, eclosa_msg, errno ); retval = 1; } + + if( retval == 0 ) + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + if( nonempty_arg( cl_opts.parser, i ) && name_pending[i] ) + { show_file_error( cl_opts.parser.argument( i ).c_str(), nfound_msg ); + retval = 1; } + + if( cl_opts.debug_level & 1 ) + std::fprintf( stderr, + "muxer tried to consume from workers %8u times\n" + "muxer had to wait %8u times\n", + courier.ocheck_counter, + courier.owait_counter ); + + if( !courier.finished() ) internal_error( "courier not finished." ); + return final_exit_status( retval, cl_opts.program_mode != m_diff ); + } diff --git a/delete.cc b/delete.cc new file mode 100644 index 0000000..6fc58f5 --- /dev/null +++ b/delete.cc @@ -0,0 +1,190 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cctype> +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <stdint.h> // for lzlib.h +#include <unistd.h> +#include <lzlib.h> + +#include "tarlz.h" +#include "arg_parser.h" +#include "lzip_index.h" +#include "archive_reader.h" + + +bool safe_seek( const int fd, const long long pos ) + { + if( lseek( fd, pos, SEEK_SET ) == pos ) return true; + show_error( seek_msg, errno ); return false; + } + + +int tail_copy( const Arg_parser & parser, const Archive_descriptor & ad, + std::vector< char > & name_pending, const long long istream_pos, + const int outfd, int retval ) + { + const long long rest = ad.lzip_index.file_size() - istream_pos; + if( istream_pos > 0 && rest > 0 && + ( !safe_seek( ad.infd, istream_pos ) || + !copy_file( ad.infd, outfd, rest ) ) ) + { show_file_error( ad.namep, "Error during tail copy." ); + return retval ? retval : 1; } + const long long ostream_pos = lseek( outfd, 0, SEEK_CUR ); + if( ostream_pos < 0 ) + { show_file_error( ad.namep, seek_msg, errno ); retval = 1; } + else if( ostream_pos > 0 && ostream_pos < ad.lzip_index.file_size() ) + { + int ret; + do ret = ftruncate( outfd, ostream_pos ); + while( ret != 0 && errno == EINTR ); + if( ret != 0 || lseek( outfd, 0, SEEK_END ) != ostream_pos ) + { + show_file_error( ad.namep, "Can't truncate archive", errno ); + if( retval < 1 ) retval = 1; + } + } + + if( ( close( outfd ) | close( ad.infd ) ) != 0 && retval == 0 ) + { show_file_error( ad.namep, eclosa_msg, errno ); retval = 1; } + + if( retval == 0 ) + for( int i = 0; i < parser.arguments(); ++i ) + if( nonempty_arg( parser, i ) && name_pending[i] ) + { show_file_error( parser.argument( i ).c_str(), nfound_msg ); + retval = 1; } + return retval; + } + + +/* Deleting from a corrupt archive must not worsen the corruption. Stop and + tail-copy as soon as corruption is found. +*/ +int delete_members( const Cl_options & cl_opts ) + { + if( cl_opts.num_files <= 0 ) + { if( verbosity >= 1 ) show_error( "Nothing to delete." ); return 0; } + if( cl_opts.archive_name.empty() ) + { show_error( "Deleting from stdin not implemented yet." ); return 1; } + const Archive_descriptor ad( cl_opts.archive_name ); + if( ad.infd < 0 ) return 1; + const int outfd = open_outstream( cl_opts.archive_name, false ); + if( outfd < 0 ) { close( ad.infd ); return 1; } + + // mark member names to be deleted + std::vector< char > name_pending( cl_opts.parser.arguments(), false ); + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + if( nonempty_arg( cl_opts.parser, i ) && + !Exclude::excluded( cl_opts.parser.argument( i ).c_str() ) ) + name_pending[i] = true; + + if( ad.indexed ) // archive is a compressed regular file + return delete_members_lz( cl_opts, ad, name_pending, outfd ); + if( !ad.seekable ) + { show_file_error( ad.namep, "Archive is not seekable." ); return 1; } + if( ad.lzip_index.file_size() < 3 * header_size ) + { show_file_error( ad.namep, has_lz_ext( ad.name ) ? posix_lz_msg : posix_msg ); + return 2; } + // archive is uncompressed seekable, unless compressed corrupt + + Archive_reader ar( ad ); // serial reader + Resizable_buffer rbuf; + long long istream_pos = 0; // source of next data move + long long member_begin = 0; // first pos of current tar member + Extended extended; // metadata from extended records + int retval = 0; + bool prev_extended = false; // prev header was extended + if( !rbuf.size() ) { show_error( mem_msg ); return 1; } + + while( true ) // process one tar header per iteration + { + if( !prev_extended && ( member_begin = lseek( ad.infd, 0, SEEK_CUR ) ) < 0 ) + { show_file_error( ad.namep, seek_msg, errno ); retval = 1; break; } + Tar_header header; + if( ( retval = ar.read( header, header_size ) ) != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); break; } + if( !verify_ustar_chksum( header ) ) // error or EOA + { + if( block_is_zero( header, header_size ) ) // EOA + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg1 ); retval = 2; } + break; + } + // posix format already verified by archive reader + show_file_error( ad.namep, bad_hdr_msg ); + retval = 2; break; + } + + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_global ) + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg2 ); retval = 2; break; } + Extended dummy; // global headers are parsed and ignored + retval = ar.parse_records( dummy, header, rbuf, gblrec_msg, true ); + if( retval ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); break; } + continue; + } + if( typeflag == tf_extended ) + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg3 ); retval = 2; break; } + if( ( retval = ar.parse_records( extended, header, rbuf, extrec_msg, + cl_opts.permissive ) ) != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); break; } + if( !extended.crc_present() && cl_opts.missing_crc ) + { show_file_error( ad.namep, miscrc_msg ); retval = 2; break; } + prev_extended = true; continue; + } + prev_extended = false; + + extended.fill_from_ustar( header ); // copy metadata from header + + if( ( retval = ar.skip_member( extended ) ) != 0 ) + { show_file_error( ad.namep, seek_msg, errno ); break; } + + // delete tar member + if( !check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) + { + print_removed_prefix( extended.removed_prefix ); + if( !show_member_name( extended, header, 1, rbuf ) ) + { retval = 1; break; } + const long long pos = lseek( ad.infd, 0, SEEK_CUR ); + if( pos <= 0 || pos <= member_begin || member_begin < istream_pos ) + { show_file_error( ad.namep, seek_msg, errno ); retval = 1; break; } + const long long size = member_begin - istream_pos; + if( size > 0 ) // move pending data each time a member is deleted + { + if( istream_pos == 0 ) + { if( !safe_seek( outfd, size ) ) { retval = 1; break; } } + else if( !safe_seek( ad.infd, istream_pos ) || + !copy_file( ad.infd, outfd, size ) || + !safe_seek( ad.infd, pos ) ) { retval = 1; break; } + } + istream_pos = pos; + } + extended.reset(); + } + + return tail_copy( cl_opts.parser, ad, name_pending, istream_pos, outfd, retval ); + } diff --git a/delete_lz.cc b/delete_lz.cc new file mode 100644 index 0000000..2e536e3 --- /dev/null +++ b/delete_lz.cc @@ -0,0 +1,139 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cctype> +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <stdint.h> // for lzlib.h +#include <unistd.h> +#include <lzlib.h> + +#include "tarlz.h" +#include "arg_parser.h" +#include "lzip_index.h" +#include "archive_reader.h" + + +/* Deleting from a corrupt archive must not worsen the corruption. Stop and + tail-copy as soon as corruption is found. +*/ +int delete_members_lz( const Cl_options & cl_opts, + const Archive_descriptor & ad, + std::vector< char > & name_pending, + const int outfd ) + { + Archive_reader_i ar( ad ); // indexed reader + Resizable_buffer rbuf; + if( !rbuf.size() || ar.fatal() ) { show_error( mem_msg ); return 1; } + + long long istream_pos = 0; // source of next data move + int retval = 0, retval2 = 0; + for( long i = 0; i < ad.lzip_index.members(); ++i ) + { + if( ad.lzip_index.dblock( i ).size() == 0 ) continue; // empty lzip member + long long member_begin = 0; // first pos of current tar member + Extended extended; // metadata from extended records + bool prev_extended = false; // prev header was extended + ar.set_member( i ); // prepare for new member + while( true ) // process one tar header per iteration + { + if( ar.data_pos() >= ar.mdata_end() ) + { + if( ar.at_member_end() && !prev_extended ) break; + // member end exceeded or ends in extended + show_file_error( ad.namep, "Member misalignment found." ); + retval = 2; goto done; + } + if( !prev_extended ) member_begin = ar.data_pos(); + Tar_header header; + if( ( retval = ar.read( header, header_size ) ) != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); goto done; } + if( !verify_ustar_chksum( header ) ) // error or EOA + { + if( block_is_zero( header, header_size ) ) // EOA + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg1 ); retval = 2; } + goto done; + } + // indexed archive reader does not verify posix format + show_file_error( ad.namep, ( ar.data_pos() > header_size ) ? + bad_hdr_msg : posix_lz_msg ); + retval = 2; + goto done; + } + + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_global ) + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg2 ); retval = 2; goto done; } + Extended dummy; // global headers are parsed and ignored + retval = ar.parse_records( dummy, header, rbuf, gblrec_msg, true ); + if( retval ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); goto done; } + continue; + } + if( typeflag == tf_extended ) + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg3 ); retval = 2; goto done; } + if( ( retval = ar.parse_records( extended, header, rbuf, extrec_msg, + cl_opts.permissive ) ) != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); goto done; } + if( !extended.crc_present() && cl_opts.missing_crc ) + { show_file_error( ad.namep, miscrc_msg ); retval = 2; goto done; } + prev_extended = true; continue; + } + prev_extended = false; + + extended.fill_from_ustar( header ); // copy metadata from header + + if( ( retval = ar.skip_member( extended ) ) != 0 ) goto done; + + // delete tar member + if( !check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) + { + print_removed_prefix( extended.removed_prefix ); + // verify that members match + if( member_begin != ad.lzip_index.dblock( i ).pos() || !ar.at_member_end() ) + { show_file_error( extended.path().c_str(), + "Can't delete: not compressed individually." ); + retval2 = 2; extended.reset(); continue; } + if( !show_member_name( extended, header, 1, rbuf ) ) + { retval = 1; goto done; } + const long long size = ad.lzip_index.mblock( i ).pos() - istream_pos; + if( size > 0 ) // move pending data each time a member is deleted + { + if( istream_pos == 0 ) + { if( !safe_seek( outfd, size ) ) { retval = 1; goto done; } } + else if( !safe_seek( ad.infd, istream_pos ) || + !copy_file( ad.infd, outfd, size ) ) { retval = 1; goto done; } + } + istream_pos = ad.lzip_index.mblock( i ).end(); // member end + } + extended.reset(); + } + } +done: + if( retval < retval2 ) retval = retval2; + // tail copy keeps trailing data + return tail_copy( cl_opts.parser, ad, name_pending, istream_pos, outfd, retval ); + } diff --git a/doc/tarlz.1 b/doc/tarlz.1 new file mode 100644 index 0000000..d23b164 --- /dev/null +++ b/doc/tarlz.1 @@ -0,0 +1,177 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. +.TH TARLZ "1" "September 2022" "tarlz 0.23" "User Commands" +.SH NAME +tarlz \- creates tar archives with multimember lzip compression +.SH SYNOPSIS +.B tarlz +\fI\,operation \/\fR[\fI\,options\/\fR] [\fI\,files\/\fR] +.SH DESCRIPTION +Tarlz is a massively parallel (multi\-threaded) combined implementation of +the tar archiver and the lzip compressor. Tarlz uses the compression library +lzlib. +.PP +Tarlz creates, lists, and extracts archives in a simplified and safer +variant of the POSIX pax format compressed in lzip format, keeping the +alignment between tar members and lzip members. The resulting multimember +tar.lz archive is fully backward compatible with standard tar tools like GNU +tar, which treat it like any other tar.lz archive. Tarlz can append files to +the end of such compressed archives. +.PP +Keeping the alignment between tar members and lzip members has two +advantages. It adds an indexed lzip layer on top of the tar archive, making +it possible to decode the archive safely in parallel. It also minimizes the +amount of data lost in case of corruption. +.PP +The tarlz file format is a safe POSIX\-style backup format. In case of +corruption, tarlz can extract all the undamaged members from the tar.lz +archive, skipping over the damaged members, just like the standard +(uncompressed) tar. Moreover, the option '\-\-keep\-damaged' can be used to +recover as much data as possible from each damaged member, and lziprecover +can be used to recover some of the damaged members. +.SS "Operations:" +.TP +\fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-A\fR, \fB\-\-concatenate\fR +append archives to the end of an archive +.TP +\fB\-c\fR, \fB\-\-create\fR +create a new archive +.TP +\fB\-d\fR, \fB\-\-diff\fR +find differences between archive and file system +.TP +\fB\-\-delete\fR +delete files/directories from an archive +.TP +\fB\-r\fR, \fB\-\-append\fR +append files to the end of an archive +.TP +\fB\-t\fR, \fB\-\-list\fR +list the contents of an archive +.TP +\fB\-x\fR, \fB\-\-extract\fR +extract files/directories from an archive +.TP +\fB\-z\fR, \fB\-\-compress\fR +compress existing POSIX tar archives +.TP +\fB\-\-check\-lib\fR +check version of lzlib and exit +.SH OPTIONS +.TP +\fB\-B\fR, \fB\-\-data\-size=\fR<bytes> +set target size of input data blocks [2x8=16 MiB] +.TP +\fB\-C\fR, \fB\-\-directory=\fR<dir> +change to directory <dir> +.TP +\fB\-f\fR, \fB\-\-file=\fR<archive> +use archive file <archive> +.TP +\fB\-h\fR, \fB\-\-dereference\fR +follow symlinks; archive the files they point to +.TP +\fB\-n\fR, \fB\-\-threads=\fR<n> +set number of (de)compression threads [2] +.TP +\fB\-o\fR, \fB\-\-output=\fR<file> +compress to <file> +.TP +\fB\-p\fR, \fB\-\-preserve\-permissions\fR +don't subtract the umask on extraction +.TP +\fB\-q\fR, \fB\-\-quiet\fR +suppress all messages +.TP +\fB\-v\fR, \fB\-\-verbose\fR +verbosely list files processed +.TP +\fB\-0\fR .. \fB\-9\fR +set compression level [default 6] +.TP +\fB\-\-uncompressed\fR +don't compress the archive created +.TP +\fB\-\-asolid\fR +create solidly compressed appendable archive +.TP +\fB\-\-bsolid\fR +create per block compressed archive (default) +.TP +\fB\-\-dsolid\fR +create per directory compressed archive +.TP +\fB\-\-no\-solid\fR +create per file compressed archive +.TP +\fB\-\-solid\fR +create solidly compressed archive +.TP +\fB\-\-anonymous\fR +equivalent to '\-\-owner=root \fB\-\-group\fR=\fI\,root\/\fR' +.TP +\fB\-\-owner=\fR<owner> +use <owner> name/ID for files added to archive +.TP +\fB\-\-group=\fR<group> +use <group> name/ID for files added to archive +.TP +\fB\-\-exclude=\fR<pattern> +exclude files matching a shell pattern +.TP +\fB\-\-ignore\-ids\fR +ignore differences in owner and group IDs +.TP +\fB\-\-ignore\-overflow\fR +ignore mtime overflow differences on 32\-bit +.TP +\fB\-\-keep\-damaged\fR +don't delete partially extracted files +.TP +\fB\-\-missing\-crc\fR +exit with error status if missing extended CRC +.TP +\fB\-\-mtime=\fR<date> +use <date> as mtime for files added to archive +.TP +\fB\-\-out\-slots=\fR<n> +number of 1 MiB output packets buffered [64] +.TP +\fB\-\-warn\-newer\fR +warn if any file is newer than the archive +.PP +If no archive is specified, tarlz tries to read it from standard input or +write it to standard output. +.PP +Exit status: 0 for a normal exit, 1 for environmental problems +(file not found, files differ, invalid command line options, I/O errors, +etc), 2 to indicate a corrupt or invalid input file, 3 for an internal +consistency error (e.g., bug) which caused tarlz to panic. +.SH "REPORTING BUGS" +Report bugs to lzip\-bug@nongnu.org +.br +Tarlz home page: http://www.nongnu.org/lzip/tarlz.html +.SH COPYRIGHT +Copyright \(co 2022 Antonio Diaz Diaz. +Using lzlib 1.13 +License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +The full documentation for +.B tarlz +is maintained as a Texinfo manual. If the +.B info +and +.B tarlz +programs are properly installed at your site, the command +.IP +.B info tarlz +.PP +should give you access to the complete manual. diff --git a/doc/tarlz.info b/doc/tarlz.info new file mode 100644 index 0000000..d71c0a3 --- /dev/null +++ b/doc/tarlz.info @@ -0,0 +1,1272 @@ +This is tarlz.info, produced by makeinfo version 4.13+ from tarlz.texi. + +INFO-DIR-SECTION Archiving +START-INFO-DIR-ENTRY +* Tarlz: (tarlz). Archiver with multimember lzip compression +END-INFO-DIR-ENTRY + + +File: tarlz.info, Node: Top, Next: Introduction, Up: (dir) + +Tarlz Manual +************ + +This manual is for Tarlz (version 0.23, 23 September 2022). + +* Menu: + +* Introduction:: Purpose and features of tarlz +* Invoking tarlz:: Command line interface +* Portable character set:: POSIX portable filename character set +* File format:: Detailed format of the compressed archive +* Amendments to pax format:: The reasons for the differences with pax +* Program design:: Internal structure of tarlz +* Multi-threaded decoding:: Limitations of parallel tar decoding +* Minimum archive sizes:: Sizes required for full multi-threaded speed +* Examples:: A small tutorial with examples +* Problems:: Reporting bugs +* Concept index:: Index of concepts + + + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This manual is free documentation: you have unlimited permission to copy, +distribute, and modify it. + + +File: tarlz.info, Node: Introduction, Next: Invoking tarlz, Prev: Top, Up: Top + +1 Introduction +************** + +Tarlz is a massively parallel (multi-threaded) combined implementation of +the tar archiver and the lzip compressor. Tarlz uses the compression +library lzlib. + + Tarlz creates tar archives using a simplified and safer variant of the +POSIX pax format compressed in lzip format, keeping the alignment between +tar members and lzip members. The resulting multimember tar.lz archive is +fully backward compatible with standard tar tools like GNU tar, which treat +it like any other tar.lz archive. Tarlz can append files to the end of such +compressed archives. + + Keeping the alignment between tar members and lzip members has two +advantages. It adds an indexed lzip layer on top of the tar archive, making +it possible to decode the archive safely in parallel. It also minimizes the +amount of data lost in case of corruption. Compressing a tar archive with +plzip may even double the amount of files lost for each lzip member damaged +because it does not keep the members aligned. + + Tarlz can create tar archives with five levels of compression +granularity: per file (--no-solid), per block (--bsolid, default), per +directory (--dsolid), appendable solid (--asolid), and solid (--solid). It +can also create uncompressed tar archives. + +Of course, compressing each file (or each directory) individually can't +achieve a compression ratio as high as compressing solidly the whole tar +archive, but it has the following advantages: + + * The resulting multimember tar.lz archive can be decompressed in + parallel, multiplying the decompression speed. + + * New members can be appended to the archive (by removing the + end-of-archive member), and unwanted members can be deleted from the + archive. Just like an uncompressed tar archive. + + * It is a safe POSIX-style backup format. In case of corruption, tarlz + can extract all the undamaged members from the tar.lz archive, + skipping over the damaged members, just like the standard + (uncompressed) tar. Moreover, the option '--keep-damaged' can be used + to recover as much data as possible from each damaged member, and + lziprecover can be used to recover some of the damaged members. + + * A multimember tar.lz archive is usually smaller than the corresponding + solidly compressed tar.gz archive, except when individually + compressing files smaller than about 32 KiB. + + Tarlz protects the extended records with a Cyclic Redundancy Check (CRC) +in a way compatible with standard tar tools. *Note crc32::. + + Tarlz does not understand other tar formats like 'gnu', 'oldgnu', 'star' +or 'v7'. The command 'tarlz -tf archive.tar.lz > /dev/null' can be used to +verify that the format of the archive is compatible with tarlz. + + +File: tarlz.info, Node: Invoking tarlz, Next: Portable character set, Prev: Introduction, Up: Top + +2 Invoking tarlz +**************** + +The format for running tarlz is: + + tarlz OPERATION [OPTIONS] [FILES] + +All operations except '--concatenate' and '--compress' operate on whole +trees if any FILE is a directory. All operations except '--compress' +overwrite output files without warning. If no archive is specified, tarlz +tries to read it from standard input or write it to standard output. Tarlz +refuses to read archive data from a terminal or write archive data to a +terminal. Tarlz detects when the archive being created or enlarged is among +the files to be archived, appended, or concatenated, and skips it. + + Tarlz does not use absolute file names nor file names above the current +working directory (perhaps changed by option '-C'). On archive creation or +appending tarlz archives the files specified, but removes from member names +any leading and trailing slashes and any file name prefixes containing a +'..' component. On extraction, leading and trailing slashes are also +removed from member names, and archive members containing a '..' component +in the file name are skipped. Tarlz does not follow symbolic links during +extraction; not even symbolic links replacing intermediate directories. + + On extraction and listing, tarlz removes leading './' strings from +member names in the archive or given in the command line, so that +'tarlz -xf foo ./bar baz' extracts members 'bar' and './baz' from archive +'foo'. + + If several compression levels or '--*solid' options are given, the last +setting is used. For example '-9 --solid --uncompressed -1' is equivalent +to '-1 --solid'. + + tarlz supports the following operations: + +'--help' + Print an informative help message describing the options and exit. + +'-V' +'--version' + Print the version number of tarlz on the standard output and exit. + This version number should be included in all bug reports. + +'-A' +'--concatenate' + Append one or more archives to the end of an archive. If no archive is + specified with the option '-f', the input archives are concatenated to + standard output. All the archives involved must be regular (seekable) + files, and must be either all compressed or all uncompressed. + Compressed and uncompressed archives can't be mixed. Compressed + archives must be multimember lzip files with the two end-of-archive + blocks plus any zero padding contained in the last lzip member of each + archive. The intermediate end-of-archive blocks are removed as each + new archive is concatenated. If the archive is uncompressed, tarlz + parses tar headers until it finds the end-of-archive blocks. Exit with + status 0 without modifying the archive if no FILES have been specified. + + Concatenating archives containing files in common results in two or + more tar members with the same name in the resulting archive, which + may produce nondeterministic behavior during multi-threaded extraction. + *Note mt-extraction::. + +'-c' +'--create' + Create a new archive from FILES. + +'-d' +'--diff' + Compare and report differences between archive and file system. For + each tar member in the archive, verify that the corresponding file in + the file system exists and is of the same type (regular file, + directory, etc). Report on standard output the differences found in + type, mode (permissions), owner and group IDs, modification time, file + size, file contents (of regular files), target (of symlinks) and + device number (of block/character special files). + + As tarlz removes leading slashes from member names, the option '-C' may + be used in combination with '--diff' when absolute file names were used + on archive creation: 'tarlz -C / -d'. Alternatively, tarlz may be run + from the root directory to perform the comparison. + +'--delete' + Delete files and directories from an archive in place. It currently can + delete only from uncompressed archives and from archives with files + compressed individually ('--no-solid' archives). Note that files of + about '--data-size' or larger are compressed individually even if + '--bsolid' is used, and can therefore be deleted. Tarlz takes care to + not delete a tar member unless it is possible to do so. For example it + won't try to delete a tar member that is not compressed individually. + Even in the case of finding a corrupt member after having deleted some + member(s), tarlz stops and copies the rest of the file as soon as + corruption is found, leaving it just as corrupt as it was, but not + worse. + + To delete a directory without deleting the files under it, use + 'tarlz --delete -f foo --exclude='dir/*' dir'. Deleting in place may + be dangerous. A corrupt archive, a power cut, or an I/O error may cause + data loss. + +'-r' +'--append' + Append files to the end of an archive. The archive must be a regular + (seekable) file either compressed or uncompressed. Compressed members + can't be appended to an uncompressed archive, nor vice versa. If the + archive is compressed, it must be a multimember lzip file with the two + end-of-archive blocks plus any zero padding contained in the last lzip + member of the archive. It is possible to append files to an archive + with a different compression granularity. Appending works as follows; + first the end-of-archive blocks are removed, then the new members are + appended, and finally two new end-of-archive blocks are appended to + the archive. If the archive is uncompressed, tarlz parses and skips + tar headers until it finds the end-of-archive blocks. Exit with status + 0 without modifying the archive if no FILES have been specified. + + Appending files already present in the archive results in two or more + tar members with the same name, which may produce nondeterministic + behavior during multi-threaded extraction. *Note mt-extraction::. + +'-t' +'--list' + List the contents of an archive. If FILES are given, list only the + FILES given. + +'-x' +'--extract' + Extract files from an archive. If FILES are given, extract only the + FILES given. Else extract all the files in the archive. To extract a + directory without extracting the files under it, use + 'tarlz -xf foo --exclude='dir/*' dir'. Tarlz removes files and empty + directories unconditionally before extracting over them. Other than + that, it will not make any special effort to extract a file over an + incompatible type of file. For example, extracting a file over a + non-empty directory will usually fail. + +'-z' +'--compress' + Compress existing POSIX tar archives aligning the lzip members to the + tar members with choice of granularity (--bsolid by default, --dsolid + works like --asolid). The input archives are kept unchanged. Existing + compressed archives are not overwritten. A hyphen '-' used as the name + of an input archive reads from standard input and writes to standard + output (unless the option '--output' is used). Tarlz can be used as + compressor for GNU tar using a command like + 'tar -c -Hustar foo | tarlz -z -o foo.tar.lz'. Note that tarlz only + works reliably on archives without global headers, or with global + headers whose content can be ignored. + + The compression is reversible, including any garbage present after the + end-of-archive blocks. Tarlz stops parsing after the first + end-of-archive block is found, and then compresses the rest of the + archive. Unless solid compression is requested, the end-of-archive + blocks are compressed in a lzip member separated from the preceding + members and from any non-zero garbage following the end-of-archive + blocks. '--compress' implies plzip argument style, not tar style. Each + input archive is compressed to a file with the extension '.lz' added + unless the option '--output' is used. When '--output' is used, only + one input archive can be specified. '-f' can't be used with + '--compress'. + +'--check-lib' + Compare the version of lzlib used to compile tarlz with the version + actually being used at run time and exit. Report any differences + found. Exit with error status 1 if differences are found. A mismatch + may indicate that lzlib is not correctly installed or that a different + version of lzlib has been installed after compiling tarlz. Exit with + error status 2 if LZ_API_VERSION and LZ_version_string don't match. + 'tarlz -v --check-lib' shows the version of lzlib being used and the + value of LZ_API_VERSION (if defined). *Note Library version: + (lzlib)Library version. + + + tarlz supports the following options: *Note Argument syntax: +(arg_parser)Argument syntax. + +'-B BYTES' +'--data-size=BYTES' + Set target size of input data blocks for the option '--bsolid'. *Note + --bsolid::. Valid values range from 8 KiB to 1 GiB. Default value is + two times the dictionary size, except for option '-0' where it + defaults to 1 MiB. *Note Minimum archive sizes::. + +'-C DIR' +'--directory=DIR' + Change to directory DIR. When creating or appending, the position of + each '-C' option in the command line is significant; it will change the + current working directory for the following FILES until a new '-C' + option appears in the command line. When extracting or comparing, all + the '-C' options are executed in sequence before reading the archive. + Listing ignores any '-C' options specified. DIR is relative to the + then current working directory, perhaps changed by a previous '-C' + option. + + Note that a process can only have one current working directory (CWD). + Therefore multi-threading can't be used to create an archive if a '-C' + option appears after a relative file name in the command line. + +'-f ARCHIVE' +'--file=ARCHIVE' + Use archive file ARCHIVE. A hyphen '-' used as an ARCHIVE argument + reads from standard input or writes to standard output. + +'-h' +'--dereference' + Follow symbolic links during archive creation, appending or comparison. + Archive or compare the files they point to instead of the links + themselves. + +'-n N' +'--threads=N' + Set the number of (de)compression threads, overriding the system's + default. Valid values range from 0 to "as many as your system can + support". A value of 0 disables threads entirely. If this option is + not used, tarlz tries to detect the number of processors in the system + and use it as default value. 'tarlz --help' shows the system's default + value. See the note about multi-threaded archive creation in the + option '-C' above. + + Note that the number of usable threads is limited during compression to + ceil( uncompressed_size / data_size ) (*note Minimum archive sizes::), + and during decompression to the number of lzip members in the tar.lz + archive, which you can find by running 'lzip -lv archive.tar.lz'. + +'-o FILE' +'--output=FILE' + Write the compressed output to FILE. '-o -' writes the compressed + output to standard output. Currently '--output' only works with + '--compress'. + +'-p' +'--preserve-permissions' + On extraction, set file permissions as they appear in the archive. + This is the default behavior when tarlz is run by the superuser. The + default for other users is to subtract the umask of the user running + tarlz from the permissions specified in the archive. + +'-q' +'--quiet' + Quiet operation. Suppress all messages. + +'-v' +'--verbose' + Verbosely list files processed. Further -v's (up to 4) increase the + verbosity level. + +'-0 .. -9' + Set the compression level for '--create', '--append', and + '--compress'. The default compression level is '-6'. Like lzip, tarlz + also minimizes the dictionary size of the lzip members it creates, + reducing the amount of memory required for decompression. + + Level Dictionary size Match length limit + -0 64 KiB 16 bytes + -1 1 MiB 5 bytes + -2 1.5 MiB 6 bytes + -3 2 MiB 8 bytes + -4 3 MiB 12 bytes + -5 4 MiB 20 bytes + -6 8 MiB 36 bytes + -7 16 MiB 68 bytes + -8 24 MiB 132 bytes + -9 32 MiB 273 bytes + +'--uncompressed' + With '--create', don't compress the tar archive created. Create an + uncompressed tar archive instead. With '--append', don't compress the + new members appended to the tar archive. Compressed members can't be + appended to an uncompressed archive, nor vice versa. + +'--asolid' + When creating or appending to a compressed archive, use appendable + solid compression. All the files being added to the archive are + compressed into a single lzip member, but the end-of-archive blocks + are compressed into a separate lzip member. This creates a solidly + compressed appendable archive. Solid archives can't be created nor + decoded in parallel. + +'--bsolid' + When creating or appending to a compressed archive, use block + compression. Tar members are compressed together in a lzip member + until they approximate a target uncompressed size. The size can't be + exact because each solidly compressed data block must contain an + integer number of tar members. Block compression is the default + because it improves compression ratio for archives with many files + smaller than the block size. This option allows tarlz revert to + default behavior if, for example, it is invoked through an alias like + 'tar='tarlz --solid''. *Note --data-size::, to set the target block + size. + +'--dsolid' + When creating or appending to a compressed archive, compress each file + specified in the command line separately in its own lzip member, and + use solid compression for each directory specified in the command + line. The end-of-archive blocks are compressed into a separate lzip + member. This creates a compressed appendable archive with a separate + lzip member for each file or top-level directory specified. + +'--no-solid' + When creating or appending to a compressed archive, compress each file + separately in its own lzip member. The end-of-archive blocks are + compressed into a separate lzip member. This creates a compressed + appendable archive with a lzip member for each file. + +'--solid' + When creating or appending to a compressed archive, use solid + compression. The files being added to the archive, along with the + end-of-archive blocks, are compressed into a single lzip member. The + resulting archive is not appendable. No more files can be later + appended to the archive. Solid archives can't be created nor decoded + in parallel. + +'--anonymous' + Equivalent to '--owner=root --group=root'. + +'--owner=OWNER' + When creating or appending, use OWNER for files added to the archive. + If OWNER is not a valid user name, it is decoded as a decimal numeric + user ID. + +'--group=GROUP' + When creating or appending, use GROUP for files added to the archive. + If GROUP is not a valid group name, it is decoded as a decimal numeric + group ID. + +'--exclude=PATTERN' + Exclude files matching a shell pattern like '*.o'. A file is considered + to match if any component of the file name matches. For example, '*.o' + matches 'foo.o', 'foo.o/bar' and 'foo/bar.o'. If PATTERN contains a + '/', it matches a corresponding '/' in the file name. For example, + 'foo/*.o' matches 'foo/bar.o'. Multiple '--exclude' options can be + specified. + +'--ignore-ids' + Make '--diff' ignore differences in owner and group IDs. This option is + useful when comparing an '--anonymous' archive. + +'--ignore-overflow' + Make '--diff' ignore differences in mtime caused by overflow on 32-bit + systems with a 32-bit time_t. + +'--keep-damaged' + Don't delete partially extracted files. If a decompression error + happens while extracting a file, keep the partial data extracted. Use + this option to recover as much data as possible from each damaged + member. It is recommended to run tarlz in single-threaded mode + (--threads=0) when using this option. + +'--missing-crc' + Exit with error status 2 if the CRC of the extended records is + missing. When this option is used, tarlz detects any corruption in the + extended records (only limited by CRC collisions). But note that a + corrupt 'GNU.crc32' keyword, for example 'GNU.crc33', is reported as a + missing CRC instead of as a corrupt record. This misleading + 'Missing CRC' message is the consequence of a flaw in the POSIX pax + format; i.e., the lack of a mandatory check sequence of the extended + records. *Note crc32::. + +'--mtime=DATE' + When creating or appending, use DATE as the modification time for + files added to the archive instead of their actual modification times. + The value of DATE may be either '@' followed by the number of seconds + since (or before) the epoch, or a date in format + '[-]YYYY-MM-DD HH:MM:SS' or '[-]YYYY-MM-DDTHH:MM:SS', or the name of + an existing reference file starting with '.' or '/' whose modification + time is used. The time of day 'HH:MM:SS' in the date format is + optional and defaults to '00:00:00'. The epoch is + '1970-01-01 00:00:00 UTC'. Negative seconds or years define a + modification time before the epoch. + +'--out-slots=N' + Number of 1 MiB output packets buffered per worker thread during + multi-threaded creation or appending to compressed archives. + Increasing the number of packets may increase compression speed if the + files being archived are larger than 64 MiB compressed, but requires + more memory. Valid values range from 1 to 1024. The default value is + 64. + +'--warn-newer' + During archive creation, warn if any file being archived has a + modification time newer than the archive creation time. This option + may slow archive creation somewhat because it makes an extra call to + 'stat' after archiving each file, but it guarantees that file contents + were not modified during the creation of the archive. Note that the + file must be at least one second newer than the archive for it to be + detected as newer. + + + Exit status: 0 for a normal exit, 1 for environmental problems (file not +found, files differ, invalid command line options, I/O errors, etc), 2 to +indicate a corrupt or invalid input file, 3 for an internal consistency +error (e.g., bug) which caused tarlz to panic. + + +File: tarlz.info, Node: Portable character set, Next: File format, Prev: Invoking tarlz, Up: Top + +3 POSIX portable filename character set +*************************************** + +The set of characters from which portable file names are constructed. + + A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z + 0 1 2 3 4 5 6 7 8 9 . _ - + + The last three characters are the period, underscore, and hyphen-minus +characters, respectively. + + File names are identifiers. Therefore, archiving works better when file +names use only the portable character set without spaces added. + + +File: tarlz.info, Node: File format, Next: Amendments to pax format, Prev: Portable character set, Up: Top + +4 File format +************* + +In the diagram below, a box like this: + ++---+ +| | <-- the vertical bars might be missing ++---+ + + represents one byte; a box like this: + ++==============+ +| | ++==============+ + + represents a variable number of bytes or a fixed but large number of +bytes (for example 512). + + + A tar.lz file consists of a series of lzip members (compressed data +sets). The members simply appear one after another in the file, with no +additional information before, between, or after them. + + Each lzip member contains one or more tar members in a simplified POSIX +pax interchange format. The only pax typeflag value supported by tarlz (in +addition to the typeflag values defined by the ustar format) is 'x'. The +pax format is an extension on top of the ustar format that removes the size +limitations of the ustar format. + + Each tar member contains one file archived, and is represented by the +following sequence: + + * An optional extended header block followed by one or more blocks that + contain the extended header records as if they were the contents of a + file; i.e., the extended header records are included as the data for + this header block. This header block is of the form described in pax + header block, with a typeflag value of 'x'. + + * A header block in ustar format that describes the file. Any fields + defined in the preceding optional extended header records override the + associated fields in this header block for this file. + + * Zero or more blocks that contain the contents of the file. + + Each tar member must be contiguously stored in a lzip member for the +parallel decoding operations like '--list' to work. If any tar member is +split over two or more lzip members, the archive must be decoded +sequentially. *Note Multi-threaded decoding::. + + At the end of the archive file there are two 512-byte blocks filled with +binary zeros, interpreted as an end-of-archive indicator. These EOA blocks +are either compressed in a separate lzip member or compressed along with the +tar members contained in the last lzip member. For a compressed archive to +be recognized by tarlz as appendable, the last lzip member must contain +between 512 and 32256 zeros alone. + + The diagram below shows the correspondence between each tar member +(formed by one or two headers plus optional data) in the tar archive and +each lzip member in the resulting multimember tar.lz archive, when per file +compression is used: *Note File format: (lzip)File format. + +tar ++========+======+=================+===============+========+======+========+ +| header | data | extended header | extended data | header | data | EOA | ++========+======+=================+===============+========+======+========+ + +tar.lz ++===============+=================================================+========+ +| member | member | member | ++===============+=================================================+========+ + + +4.1 Pax header block +==================== + +The pax header block is identical to the ustar header block described below +except that the typeflag has the value 'x' (extended). The field 'size' is +the size of the extended header data in bytes. Most other fields in the pax +header block are zeroed on archive creation to prevent trouble if the +archive is read by an ustar tool, and are ignored by tarlz on archive +extraction. *Note flawed-compat::. + + The pax extended header data consists of one or more records, each of +them constructed as follows: +'"%d %s=%s\n", <length>, <keyword>, <value>' + + The fields <length> and <keyword> in the record must be limited to the +portable character set (*note Portable character set::). The field <length> +contains the decimal length of the record in bytes, including the trailing +newline. The field <value> is stored as-is, without conversion to UTF-8 nor +any other transformation. The fields are separated by the ASCII characters +space, equal-sign, and newline. + + These are the <keyword> values currently supported by tarlz: + +'atime' + The signed decimal representation of the access time of the following + file in seconds since (or before) the epoch, obtained from the function + 'stat'. The atime record is created only for files with a modification + time outside of the ustar range. *Note ustar-mtime::. + +'gid' + The unsigned decimal representation of the group ID of the group that + owns the following file. The gid record is created only for files with + a group ID greater than 2_097_151 (octal 7777777). *Note + ustar-uid-gid::. + +'linkpath' + The file name of a link being created to another file, of any type, + previously archived. This record overrides the field 'linkname' in the + following ustar header block. The following ustar header block + determines the type of link created. If typeflag of the following + header block is 1, it will be a hard link. If typeflag is 2, it will + be a symbolic link and the linkpath value will be used as the contents + of the symbolic link. The linkpath record is created only for links + with a link name that does not fit in the space provided by the ustar + header. + +'mtime' + The signed decimal representation of the modification time of the + following file in seconds since (or before) the epoch, obtained from + the function 'stat'. This record overrides the field 'mtime' in the + following ustar header block. The mtime record is created only for + files with a modification time outside of the ustar range. *Note + ustar-mtime::. + +'path' + The file name of the following file. This record overrides the fields + 'name' and 'prefix' in the following ustar header block. The path + record is created for files with a name that does not fit in the space + provided by the ustar header, but is also created for files that + require any other extended record so that the fields 'name' and + 'prefix' in the following ustar header block can be zeroed. + +'size' + The size of the file in bytes, expressed as a decimal number using + digits from the ISO/IEC 646:1991 (ASCII) standard. This record + overrides the field 'size' in the following ustar header block. The + size record is created only for files with a size value greater than + 8_589_934_591 (octal 77777777777); that is, 8 GiB (2^33 bytes) or + larger. + +'uid' + The unsigned decimal representation of the user ID of the file owner + of the following file. The uid record is created only for files with a + user ID greater than 2_097_151 (octal 7777777). *Note ustar-uid-gid::. + +'GNU.crc32' + CRC32-C (Castagnoli) of the extended header data excluding the 8 bytes + representing the CRC <value> itself. The <value> is represented as 8 + hexadecimal digits in big endian order, '22 GNU.crc32=00000000\n'. The + keyword of the CRC record is protected by the CRC to guarante that + corruption is always detected when using '--missing-crc' (except in + case of CRC collision). A CRC was chosen because a checksum is too + weak for a potentially large list of variable sized records. A + checksum can't detect simple errors like the swapping of two bytes. + + + At verbosity level 1 or higher tarlz prints a diagnostic for each unknown +extended header keyword found in an archive, once per keyword. + + +4.2 Ustar header block +====================== + +The ustar header block has a length of 512 bytes and is structured as shown +in the following table. All lengths and offsets are in decimal. + +Field Name Offset Length (in bytes) +name 0 100 +mode 100 8 +uid 108 8 +gid 116 8 +size 124 12 +mtime 136 12 +chksum 148 8 +typeflag 156 1 +linkname 157 100 +magic 257 6 +version 263 2 +uname 265 32 +gname 297 32 +devmajor 329 8 +devminor 337 8 +prefix 345 155 + + All characters in the header block are coded using the ISO/IEC 646:1991 +(ASCII) standard, except in fields storing names for files, users, and +groups. For maximum portability between implementations, names should only +contain characters from the portable character set (*note Portable +character set::), but if an implementation supports the use of characters +outside of '/' and the portable character set in names for files, users, +and groups, tarlz will use the byte values in these names unmodified. + + The fields 'name', 'linkname', and 'prefix' are null-terminated +character strings except when all characters in the array contain non-null +characters including the last character. + + The fields 'name' and 'prefix' produce the file name. A new file name is +formed, if prefix is not an empty string (its first character is not null), +by concatenating prefix (up to the first null character), a slash +character, and name; otherwise, name is used alone. In either case, name is +terminated at the first null character. If prefix begins with a null +character, it is ignored. In this manner, file names of at most 256 +characters can be supported. If a file name does not fit in the space +provided, an extended record is used to store the file name. + + The field 'linkname' does not use the prefix to produce a file name. If +the link name does not fit in the 100 characters provided, an extended +record is used to store the link name. + + The field 'mode' provides 12 access permission bits. The following table +shows the symbolic name of each bit and its octal value: + +Bit Name Value Bit Name Value Bit Name Value +--------------------------------------------------- +S_ISUID 04000 S_ISGID 02000 S_ISVTX 01000 +S_IRUSR 00400 S_IWUSR 00200 S_IXUSR 00100 +S_IRGRP 00040 S_IWGRP 00020 S_IXGRP 00010 +S_IROTH 00004 S_IWOTH 00002 S_IXOTH 00001 + + The fields 'uid' and 'gid' are the user and group IDs of the owner and +group of the file, respectively. If the file uid or gid are greater than +2_097_151 (octal 7777777), an extended record is used to store the uid or +gid. + + The field 'size' contains the octal representation of the size of the +file in bytes. If the field 'typeflag' specifies a file of type '0' +(regular file) or '7' (high performance regular file), the number of logical +records following the header is (size / 512) rounded to the next integer. +For all other values of typeflag, tarlz either sets the size field to 0 or +ignores it, and does not store or expect any logical records following the +header. If the file size is larger than 8_589_934_591 bytes +(octal 77777777777), an extended record is used to store the file size. + + The field 'mtime' contains the octal representation of the modification +time of the file at the time it was archived, obtained from the function +'stat'. If the modification time is negative or larger than 8_589_934_591 +(octal 77777777777) seconds since the epoch, an extended record is used to +store the modification time. The ustar range of mtime goes from +'1970-01-01 00:00:00 UTC' to '2242-03-16 12:56:31 UTC'. + + The field 'chksum' contains the octal representation of the value of the +simple sum of all bytes in the header logical record. Each byte in the +header is treated as an unsigned value. When calculating the checksum, the +chksum field is treated as if it were all space characters. + + The field 'typeflag' contains a single character specifying the type of +file archived: + +''0'' + Regular file. + +''1'' + Hard link to another file, of any type, previously archived. Hard + links must not contain file data. + +''2'' + Symbolic link. + +''3', '4'' + Character special file and block special file respectively. In this + case the fields 'devmajor' and 'devminor' contain information defining + the device in unspecified format. + +''5'' + Directory. + +''6'' + FIFO special file. + +''7'' + Reserved to represent a file to which an implementation has associated + some high-performance attribute (contiguous file). Tarlz treats this + type of file as a regular file (type 0). + + + The field 'magic' contains the ASCII null-terminated string "ustar". The +field 'version' contains the characters "00" (0x30,0x30). The fields +'uname' and 'gname' are null-terminated character strings except when all +characters in the array contain non-null characters including the last +character. Each numeric field contains a leading space- or zero-filled, +optionally null-terminated octal number using digits from the ISO/IEC +646:1991 (ASCII) standard. Tarlz is able to decode numeric fields 1 byte +longer than standard ustar by not requiring a terminating null character. + + +File: tarlz.info, Node: Amendments to pax format, Next: Program design, Prev: File format, Up: Top + +5 The reasons for the differences with pax +****************************************** + +Tarlz creates safe archives that allow the reliable detection of invalid or +corrupt metadata during decoding even when the integrity checking of lzip +can't be used because the lzip members are only decompressed partially, as +it happens in parallel '--diff', '--list', and '--extract'. In order to +achieve this goal and avoid some other flaws in the pax format, tarlz makes +some changes to the variant of the pax format that it uses. This chapter +describes these changes and the concrete reasons to implement them. + + +5.1 Add a CRC of the extended records +===================================== + +The POSIX pax format has a serious flaw. The metadata stored in pax extended +records are not protected by any kind of check sequence. Corruption in a +long file name may cause the extraction of the file in the wrong place +without warning. Corruption in a large file size may cause the truncation of +the file or the appending of garbage to the file, both followed by a +spurious warning about a corrupt header far from the place of the undetected +corruption. + + Metadata like file name and file size must be always protected in an +archive format because of the adverse effects of undetected corruption in +them, potentially much worse that undetected corruption in the data. Even +more so in the case of pax because the amount of metadata it stores is +potentially large, making undetected corruption and archiver misbehavior +more probable. + + Headers and metadata must be protected separately from data because the +integrity checking of lzip may not be able to detect the corruption before +the metadata has been used, for example, to create a new file in the wrong +place. + + Because of the above, tarlz protects the extended records with a Cyclic +Redundancy Check (CRC) in a way compatible with standard tar tools. *Note +key_crc32::. + + +5.2 Remove flawed backward compatibility +======================================== + +In order to allow the extraction of pax archives by a tar utility conforming +to the POSIX-2:1993 standard, POSIX.1-2008 recommends selecting extended +header field values that allow such tar to create a regular file containing +the extended header records as data. This approach is broken because if the +extended header is needed because of a long file name, the fields 'name' +and 'prefix' will be unable to contain the full file name. (Some tar +implementations store the truncated name in the field 'name' alone, +truncating the name to only 100 bytes instead of 256). Therefore the files +corresponding to both the extended header and the overridden ustar header +will be extracted using truncated file names, perhaps overwriting existing +files or directories. It may be a security risk to extract a file with a +truncated file name. + + To avoid this problem, tarlz writes extended headers with all fields +zeroed except 'size' (which contains the size of the extended records), +'chksum', 'typeflag', 'magic', and 'version'. In particular, tarlz sets the +fields 'name' and 'prefix' to zero. This prevents old tar programs from +extracting the extended records as a file in the wrong place. Tarlz also +sets to zero those fields of the ustar header overridden by extended +records. Finally, tarlz skips members with zeroed 'name' and 'prefix' when +decoding, except when listing. This is needed to detect certain format +violations during parallel extraction. + + If an extended header is required for any reason (for example a file +size of 8 GiB or larger, or a link name longer than 100 bytes), tarlz also +moves the file name to the extended records to prevent an ustar tool from +trying to extract the file or link. This also makes easier during parallel +decoding the detection of a tar member split between two lzip members at +the boundary between the extended header and the ustar header. + + +5.3 As simple as possible (but not simpler) +=========================================== + +The tarlz format is mainly ustar. Extended pax headers are used only when +needed because the length of a file name or link name, or the size or other +attribute of a file exceed the limits of the ustar format. Adding 1 KiB of +extended header and records to each member just to save subsecond +timestamps seems wasteful for a backup format. Moreover, minimizing the +overhead may help recovering the archive with lziprecover in case of +corruption. + + Global pax headers are tolerated, but not supported; they are parsed and +ignored. Some operations may not behave as expected if the archive contains +global headers. + + +5.4 Improve reproducibility +=========================== + +Pax includes by default the process ID of the pax process in the ustar name +of the extended headers, making the archive not reproducible. Tarlz stores +the true name of the file just once, either in the ustar header or in the +extended records, making it easier to produce reproducible archives. + + Pax allows an extended record to have length x-1 or x if x is a power of +ten; '99<97_bytes>' or '100<97_bytes>'. Tarlz minimizes the length of the +record and always produces a length of x-1 in these cases. + + +5.5 No data in hard links +========================= + +Tarlz does not allow data in hard link members. The data (if any) must be in +the member determining the type of the file (which can't be a link). If all +the names of a file are stored as hard links, the type of the file is lost. +Not allowing data in hard links also prevents invalid actions like +extracting file data for a hard link to a symbolic link or to a directory. + + +5.6 Avoid misconversions to/from UTF-8 +====================================== + +There is no portable way to tell what charset a text string is coded into. +Therefore, tarlz stores all fields representing text strings unmodified, +without conversion to UTF-8 nor any other transformation. This prevents +accidental double UTF-8 conversions. If the need arises this behavior will +be adjusted with a command line option in the future. + + +File: tarlz.info, Node: Program design, Next: Multi-threaded decoding, Prev: Amendments to pax format, Up: Top + +6 Internal structure of tarlz +***************************** + +The parts of tarlz related to sequential processing of the archive are more +or less similar to any other tar and won't be described here. The +interesting parts described here are those related to Multi-threaded +processing. + + The structure of the part of tarlz performing Multi-threaded archive +creation is somewhat similar to that of plzip with the added complication +of the solidity levels. *Note Program design: (plzip)Program design. A +grouper thread and several worker threads are created, acting the main +thread as muxer (multiplexer) thread. A "packet courier" takes care of data +transfers among threads and limits the maximum number of data blocks +(packets) being processed simultaneously. + + The grouper traverses the directory tree, groups together the metadata of +the files to be archived in each lzip member, and distributes them to the +workers. The workers compress the metadata received from the grouper along +with the file data read from the file system. The muxer collects processed +packets from the workers, and writes them to the archive. + +.--------. +| data|---> to each worker below +| | .------------. +| file | ,-->| worker 0 |--, +| system | | `------------' | +| | .---------. | .------------. | .-------. .---------. +|metadata|--->| grouper |-+-->| worker 1 |--+-->| muxer |-->| archive | +`--------' `---------' | `------------' | `-------' `---------' + | ... | + | .------------. | + `-->| worker N-1 |--' + `------------' + + Decoding an archive is somewhat similar to how plzip decompresses a +regular file to standard output, with the differences that it is not the +data but only messages what is written to stdout/stderr, and that each +worker may access files in the file system either to read them (diff) or +write them (extract). As in plzip, each worker reads members directly from +the archive. + +.--------. +| file |<---> data to/from each worker below +| system | +`--------' .------------. + ,-->| worker 0 |--, + | `------------' | +.---------. | .------------. | .-------. .--------. +| archive |-+-->| worker 1 |--+-->| muxer |-->| stdout | +`---------' | `------------' | `-------' | stderr | + | ... | `--------' + | .------------. | + `-->| worker N-1 |--' + `------------' + + As misaligned tar.lz archives can't be decoded in parallel, and the +misalignment can't be detected until after decoding has started, a +"mastership request" mechanism has been designed that allows the decoding to +continue instead of signalling an error. + + During parallel decoding, if a worker finds a misalignment, it requests +mastership to decode the rest of the archive. When mastership is requested, +an error_member_id is set, and all subsequently received packets with +member_id > error_member_id are rejected. All workers requesting mastership +are blocked at the request_mastership call until mastership is granted. +Mastership is granted to the delivering worker when its queue is empty to +make sure that all preceding packets have been processed. When mastership is +granted, all packets are deleted and all subsequently received packets not +coming from the master are rejected. + + If a worker can't continue decoding for any cause (for example lack of +memory or finding a split tar member at the beginning of a lzip member), it +requests mastership to print an error and terminate the program. Only if +some other worker requests mastership in a previous lzip member can this +error be avoided. + + +File: tarlz.info, Node: Multi-threaded decoding, Next: Minimum archive sizes, Prev: Program design, Up: Top + +7 Limitations of parallel tar decoding +************************************** + +Safely decoding an arbitrary tar archive in parallel is only possible if one +decodes the headers sequentially first. For example, if a tar archive +containing another tar archive is decoded starting from some position other +than the beginning, there is no way to know if the first header found there +belongs to the outer tar archive or to the inner tar archive. Tar is a +format inherently serial; it was designed for tapes. + + The pax format is even more serial than the ustar format. Two headers +need to be decoded sequentially for each file. The extended header may even +need parsing to reveal something as basic as file size. If a thread decodes +the ustar header skipping the preceding extended header, it may extract a +file of incorrect size at the wrong place. Moreover, a pax archive with +global headers can't be decoded in parallel because each thread can't know +about the global headers decoded by other threads. + + In the case of compressed tar archives, the start of each compressed +block determines one point through which the tar archive can be decoded in +parallel. Therefore, in tar.lz archives the decoding operations can't be +parallelized if the tar members are not aligned with the lzip members. Tar +archives compressed with plzip can't be decoded in parallel because tar and +plzip do not have a way to align both sets of members. Certainly one can +decompress one such archive with a multi-threaded tool like plzip, but the +increase in speed is not as large as it could be because plzip must +serialize the decompressed data and pass them to tar, which decodes them +sequentially, one tar member at a time. + + On the other hand, if the tar.lz archive is created with a tool like +tarlz, which can guarantee the alignment between tar members and lzip +members because it controls both archiving and compression, then the lzip +format becomes an indexed layer on top of the tar archive which makes +possible decoding it safely in parallel. + + Tarlz is able to automatically decode aligned and unaligned multimember +tar.lz archives, keeping backwards compatibility. If tarlz finds a member +misalignment during multi-threaded decoding, it switches to single-threaded +mode and continues decoding the archive. + + If the files in the archive are large, multi-threaded '--list' on a +regular (seekable) tar.lz archive can be hundreds of times faster than +sequential '--list' because, in addition to using several processors, it +only needs to decompress part of each lzip member. See the following +example listing the Silesia corpus on a dual core machine: + + tarlz -9 --no-solid -cf silesia.tar.lz silesia + time lzip -cd silesia.tar.lz | tar -tf - (5.032s) + time plzip -cd silesia.tar.lz | tar -tf - (3.256s) + time tarlz -tf silesia.tar.lz (0.020s) + + On the other hand, multi-threaded '--list' won't detect corruption in +the tar member data because it only decodes the part of each lzip member +corresponding to the tar member header. This is another reason why the tar +headers must provide their own integrity checking. + + +7.1 Limitations of multi-threaded extraction +============================================ + +Multi-threaded extraction may produce different output than single-threaded +extraction in some cases: + + During multi-threaded extraction, several independent threads are +simultaneously reading the archive and creating files in the file system. +The archive is not read sequentially. As a consequence, any error or +weirdness in the archive (like a corrupt member or an end-of-archive block +in the middle of the archive) won't be usually detected until part of the +archive beyond that point has been processed. + + If the archive contains two or more tar members with the same name, +single-threaded extraction extracts the members in the order they appear in +the archive and leaves in the file system the last version of the file. But +multi-threaded extraction may extract the members in any order and leave in +the file system any version of the file nondeterministically. It is +unspecified which of the tar members is extracted. + + If the same file is extracted through several paths (different member +names resolve to the same file in the file system), the result is undefined. +(Probably the resulting file will be mangled). + + Extraction of a hard link may fail if it is extracted before the file it +links to. + + +File: tarlz.info, Node: Minimum archive sizes, Next: Examples, Prev: Multi-threaded decoding, Up: Top + +8 Minimum archive sizes required for multi-threaded block compression +********************************************************************* + +When creating or appending to a compressed archive using multi-threaded +block compression, tarlz puts tar members together in blocks and compresses +as many blocks simultaneously as worker threads are chosen, creating a +multimember compressed archive. + + For this to work as expected (and roughly multiply the compression speed +by the number of available processors), the uncompressed archive must be at +least as large as the number of worker threads times the block size (*note +--data-size::). Else some processors will not get any data to compress, and +compression will be proportionally slower. The maximum speed increase +achievable on a given archive is limited by the ratio +(uncompressed_size / data_size). For example, a tarball the size of gcc or +linux will scale up to 10 or 14 processors at level -9. + + The following table shows the minimum uncompressed archive size needed +for full use of N processors at a given compression level, using the default +data size for each level: + +Processors 2 4 8 16 64 256 +------------------------------------------------------------------ +Level +-0 2 MiB 4 MiB 8 MiB 16 MiB 64 MiB 256 MiB +-1 4 MiB 8 MiB 16 MiB 32 MiB 128 MiB 512 MiB +-2 6 MiB 12 MiB 24 MiB 48 MiB 192 MiB 768 MiB +-3 8 MiB 16 MiB 32 MiB 64 MiB 256 MiB 1 GiB +-4 12 MiB 24 MiB 48 MiB 96 MiB 384 MiB 1.5 GiB +-5 16 MiB 32 MiB 64 MiB 128 MiB 512 MiB 2 GiB +-6 32 MiB 64 MiB 128 MiB 256 MiB 1 GiB 4 GiB +-7 64 MiB 128 MiB 256 MiB 512 MiB 2 GiB 8 GiB +-8 96 MiB 192 MiB 384 MiB 768 MiB 3 GiB 12 GiB +-9 128 MiB 256 MiB 512 MiB 1 GiB 4 GiB 16 GiB + + +File: tarlz.info, Node: Examples, Next: Problems, Prev: Minimum archive sizes, Up: Top + +9 A small tutorial with examples +******************************** + +Example 1: Create a multimember compressed archive 'archive.tar.lz' +containing files 'a', 'b' and 'c'. + + tarlz -cf archive.tar.lz a b c + + +Example 2: Append files 'd' and 'e' to the multimember compressed archive +'archive.tar.lz'. + + tarlz -rf archive.tar.lz d e + + +Example 3: Create a solidly compressed appendable archive 'archive.tar.lz' +containing files 'a', 'b' and 'c'. Then append files 'd' and 'e' to the +archive. + + tarlz --asolid -cf archive.tar.lz a b c + tarlz --asolid -rf archive.tar.lz d e + + +Example 4: Create a compressed appendable archive containing directories +'dir1', 'dir2' and 'dir3' with a separate lzip member per directory. Then +append files 'a', 'b', 'c', 'd' and 'e' to the archive, all of them +contained in a single lzip member. The resulting archive 'archive.tar.lz' +contains 5 lzip members (including the end-of-archive member). + + tarlz --dsolid -cf archive.tar.lz dir1 dir2 dir3 + tarlz --asolid -rf archive.tar.lz a b c d e + + +Example 5: Create a solidly compressed archive 'archive.tar.lz' containing +files 'a', 'b' and 'c'. Note that no more files can be later appended to +the archive. + + tarlz --solid -cf archive.tar.lz a b c + + +Example 6: Extract all files from archive 'archive.tar.lz'. + + tarlz -xf archive.tar.lz + + +Example 7: Extract files 'a' and 'c', and the whole tree under directory +'dir1' from archive 'archive.tar.lz'. + + tarlz -xf archive.tar.lz a c dir1 + + +Example 8: Copy the contents of directory 'sourcedir' to the directory +'destdir'. + + tarlz -C sourcedir --uncompressed -cf - . | tarlz -C destdir -xf - + + +Example 9: Compress the existing POSIX archive 'archive.tar' and write the +output to 'archive.tar.lz'. Compress each member individually for maximum +availability. (If one member in the compressed archive gets damaged, the +other members can still be extracted). + + tarlz -z --no-solid archive.tar + + +Example 10: Compress the archive 'archive.tar' and write the output to +'foo.tar.lz'. + + tarlz -z -o foo.tar.lz archive.tar + + +Example 11: Concatenate and compress two archives 'archive1.tar' and +'archive2.tar', and write the output to 'foo.tar.lz'. + + tarlz -A archive1.tar archive2.tar | tarlz -z -o foo.tar.lz + + +File: tarlz.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top + +10 Reporting bugs +***************** + +There are probably bugs in tarlz. There are certainly errors and omissions +in this manual. If you report them, they will get fixed. If you don't, no +one will ever know about them and they will remain unfixed for all +eternity, if not longer. + + If you find a bug in tarlz, please send electronic mail to +<lzip-bug@nongnu.org>. Include the version number, which you can find by +running 'tarlz --version' and 'tarlz -v --check-lib'. + + +File: tarlz.info, Node: Concept index, Prev: Problems, Up: Top + +Concept index +************* + + +* Menu: + +* Amendments to pax format: Amendments to pax format. (line 6) +* bugs: Problems. (line 6) +* examples: Examples. (line 6) +* file format: File format. (line 6) +* getting help: Problems. (line 6) +* introduction: Introduction. (line 6) +* invoking: Invoking tarlz. (line 6) +* minimum archive sizes: Minimum archive sizes. (line 6) +* options: Invoking tarlz. (line 6) +* parallel tar decoding: Multi-threaded decoding. (line 6) +* portable character set: Portable character set. (line 6) +* program design: Program design. (line 6) +* usage: Invoking tarlz. (line 6) +* version: Invoking tarlz. (line 6) + + + +Tag Table: +Node: Top216 +Node: Introduction1210 +Node: Invoking tarlz4029 +Ref: --data-size12880 +Ref: --bsolid17192 +Node: Portable character set22788 +Node: File format23431 +Ref: key_crc3230188 +Ref: ustar-uid-gid33452 +Ref: ustar-mtime34254 +Node: Amendments to pax format36254 +Ref: crc3236963 +Ref: flawed-compat38274 +Node: Program design42364 +Node: Multi-threaded decoding46289 +Ref: mt-extraction49570 +Node: Minimum archive sizes50876 +Node: Examples53014 +Node: Problems55381 +Node: Concept index55936 + +End Tag Table + + +Local Variables: +coding: iso-8859-15 +End: diff --git a/doc/tarlz.texi b/doc/tarlz.texi new file mode 100644 index 0000000..5bdd2af --- /dev/null +++ b/doc/tarlz.texi @@ -0,0 +1,1338 @@ +\input texinfo @c -*-texinfo-*- +@c %**start of header +@setfilename tarlz.info +@documentencoding ISO-8859-15 +@settitle Tarlz Manual +@finalout +@c %**end of header + +@set UPDATED 23 September 2022 +@set VERSION 0.23 + +@dircategory Archiving +@direntry +* Tarlz: (tarlz). Archiver with multimember lzip compression +@end direntry + + +@ifnothtml +@titlepage +@title Tarlz +@subtitle Archiver with multimember lzip compression +@subtitle for Tarlz version @value{VERSION}, @value{UPDATED} +@author by Antonio Diaz Diaz + +@page +@vskip 0pt plus 1filll +@end titlepage + +@contents +@end ifnothtml + +@ifnottex +@node Top +@top + +This manual is for Tarlz (version @value{VERSION}, @value{UPDATED}). + +@menu +* Introduction:: Purpose and features of tarlz +* Invoking tarlz:: Command line interface +* Portable character set:: POSIX portable filename character set +* File format:: Detailed format of the compressed archive +* Amendments to pax format:: The reasons for the differences with pax +* Program design:: Internal structure of tarlz +* Multi-threaded decoding:: Limitations of parallel tar decoding +* Minimum archive sizes:: Sizes required for full multi-threaded speed +* Examples:: A small tutorial with examples +* Problems:: Reporting bugs +* Concept index:: Index of concepts +@end menu + +@sp 1 +Copyright @copyright{} 2013-2022 Antonio Diaz Diaz. + +This manual is free documentation: you have unlimited permission to copy, +distribute, and modify it. +@end ifnottex + + +@node Introduction +@chapter Introduction +@cindex introduction + +@uref{http://www.nongnu.org/lzip/tarlz.html,,Tarlz} is a massively parallel +(multi-threaded) combined implementation of the tar archiver and the +@uref{http://www.nongnu.org/lzip/lzip.html,,lzip} compressor. Tarlz uses the +compression library @uref{http://www.nongnu.org/lzip/lzlib.html,,lzlib}. + +Tarlz creates tar archives using a simplified and safer variant of the POSIX +pax format compressed in lzip format, keeping the alignment between tar +members and lzip members. The resulting multimember tar.lz archive is fully +backward compatible with standard tar tools like GNU tar, which treat it +like any other tar.lz archive. Tarlz can append files to the end of such +compressed archives. + +Keeping the alignment between tar members and lzip members has two +advantages. It adds an indexed lzip layer on top of the tar archive, making +it possible to decode the archive safely in parallel. It also minimizes the +amount of data lost in case of corruption. Compressing a tar archive with +plzip may even double the amount of files lost for each lzip member damaged +because it does not keep the members aligned. + +Tarlz can create tar archives with five levels of compression granularity: +per file (---no-solid), per block (---bsolid, default), per directory +(---dsolid), appendable solid (---asolid), and solid (---solid). It can also +create uncompressed tar archives. + +@noindent +Of course, compressing each file (or each directory) individually can't +achieve a compression ratio as high as compressing solidly the whole tar +archive, but it has the following advantages: + +@itemize @bullet +@item +The resulting multimember tar.lz archive can be decompressed in +parallel, multiplying the decompression speed. + +@item +New members can be appended to the archive (by removing the +end-of-archive member), and unwanted members can be deleted from the +archive. Just like an uncompressed tar archive. + +@item +It is a safe POSIX-style backup format. In case of corruption, tarlz +can extract all the undamaged members from the tar.lz archive, +skipping over the damaged members, just like the standard +(uncompressed) tar. Moreover, the option @samp{--keep-damaged} can be used +to recover as much data as possible from each damaged member, and +lziprecover can be used to recover some of the damaged members. + +@item +A multimember tar.lz archive is usually smaller than the corresponding +solidly compressed tar.gz archive, except when individually +compressing files smaller than about @w{32 KiB}. +@end itemize + +Tarlz protects the extended records with a Cyclic Redundancy Check (CRC) in +a way compatible with standard tar tools. @xref{crc32}. + +Tarlz does not understand other tar formats like @samp{gnu}, @samp{oldgnu}, +@samp{star} or @samp{v7}. The command +@w{@samp{tarlz -tf archive.tar.lz > /dev/null}} can be used to verify that +the format of the archive is compatible with tarlz. + + +@node Invoking tarlz +@chapter Invoking tarlz +@cindex invoking +@cindex options +@cindex usage +@cindex version + +The format for running tarlz is: + +@example +tarlz @var{operation} [@var{options}] [@var{files}] +@end example + +@noindent +All operations except @samp{--concatenate} and @samp{--compress} operate on +whole trees if any @var{file} is a directory. All operations except +@samp{--compress} overwrite output files without warning. If no archive is +specified, tarlz tries to read it from standard input or write it to +standard output. Tarlz refuses to read archive data from a terminal or write +archive data to a terminal. Tarlz detects when the archive being created or +enlarged is among the files to be archived, appended, or concatenated, and +skips it. + +Tarlz does not use absolute file names nor file names above the current +working directory (perhaps changed by option @samp{-C}). On archive creation +or appending tarlz archives the files specified, but removes from member +names any leading and trailing slashes and any file name prefixes containing +a @samp{..} component. On extraction, leading and trailing slashes are also +removed from member names, and archive members containing a @samp{..} +component in the file name are skipped. Tarlz does not follow symbolic links +during extraction; not even symbolic links replacing intermediate +directories. + +On extraction and listing, tarlz removes leading @samp{./} strings from +member names in the archive or given in the command line, so that +@w{@samp{tarlz -xf foo ./bar baz}} extracts members @samp{bar} and +@samp{./baz} from archive @samp{foo}. + +If several compression levels or @samp{--*solid} options are given, the last +setting is used. For example @w{@samp{-9 --solid --uncompressed -1}} is +equivalent to @w{@samp{-1 --solid}}. + +tarlz supports the following operations: + +@table @code +@item --help +Print an informative help message describing the options and exit. + +@item -V +@itemx --version +Print the version number of tarlz on the standard output and exit. +This version number should be included in all bug reports. + +@item -A +@itemx --concatenate +Append one or more archives to the end of an archive. If no archive is +specified with the option @samp{-f}, the input archives are concatenated to +standard output. All the archives involved must be regular (seekable) files, +and must be either all compressed or all uncompressed. Compressed and +uncompressed archives can't be mixed. Compressed archives must be +multimember lzip files with the two end-of-archive blocks plus any zero +padding contained in the last lzip member of each archive. The intermediate +end-of-archive blocks are removed as each new archive is concatenated. If +the archive is uncompressed, tarlz parses tar headers until it finds the +end-of-archive blocks. Exit with status 0 without modifying the archive if +no @var{files} have been specified. + +Concatenating archives containing files in common results in two or more tar +members with the same name in the resulting archive, which may produce +nondeterministic behavior during multi-threaded extraction. +@xref{mt-extraction}. + +@item -c +@itemx --create +Create a new archive from @var{files}. + +@item -d +@itemx --diff +Compare and report differences between archive and file system. For each tar +member in the archive, verify that the corresponding file in the file system +exists and is of the same type (regular file, directory, etc). Report on +standard output the differences found in type, mode (permissions), owner and +group IDs, modification time, file size, file contents (of regular files), +target (of symlinks) and device number (of block/character special files). + +As tarlz removes leading slashes from member names, the option @samp{-C} may +be used in combination with @samp{--diff} when absolute file names were used +on archive creation: @w{@samp{tarlz -C / -d}}. Alternatively, tarlz may be +run from the root directory to perform the comparison. + +@item --delete +Delete files and directories from an archive in place. It currently can +delete only from uncompressed archives and from archives with files +compressed individually (@samp{--no-solid} archives). Note that files of +about @samp{--data-size} or larger are compressed individually even if +@samp{--bsolid} is used, and can therefore be deleted. Tarlz takes care to +not delete a tar member unless it is possible to do so. For example it won't +try to delete a tar member that is not compressed individually. Even in the +case of finding a corrupt member after having deleted some member(s), tarlz +stops and copies the rest of the file as soon as corruption is found, +leaving it just as corrupt as it was, but not worse. + +To delete a directory without deleting the files under it, use +@w{@samp{tarlz --delete -f foo --exclude='dir/*' dir}}. Deleting in place +may be dangerous. A corrupt archive, a power cut, or an I/O error may cause +data loss. + +@item -r +@itemx --append +Append files to the end of an archive. The archive must be a regular +(seekable) file either compressed or uncompressed. Compressed members can't +be appended to an uncompressed archive, nor vice versa. If the archive is +compressed, it must be a multimember lzip file with the two end-of-archive +blocks plus any zero padding contained in the last lzip member of the +archive. It is possible to append files to an archive with a different +compression granularity. Appending works as follows; first the +end-of-archive blocks are removed, then the new members are appended, and +finally two new end-of-archive blocks are appended to the archive. If the +archive is uncompressed, tarlz parses and skips tar headers until it finds +the end-of-archive blocks. Exit with status 0 without modifying the archive +if no @var{files} have been specified. + +Appending files already present in the archive results in two or more tar +members with the same name, which may produce nondeterministic behavior +during multi-threaded extraction. @xref{mt-extraction}. + +@item -t +@itemx --list +List the contents of an archive. If @var{files} are given, list only the +@var{files} given. + +@item -x +@itemx --extract +Extract files from an archive. If @var{files} are given, extract only the +@var{files} given. Else extract all the files in the archive. To extract a +directory without extracting the files under it, use +@w{@samp{tarlz -xf foo --exclude='dir/*' dir}}. Tarlz removes files and +empty directories unconditionally before extracting over them. Other than +that, it will not make any special effort to extract a file over an +incompatible type of file. For example, extracting a file over a non-empty +directory will usually fail. + +@item -z +@itemx --compress +Compress existing POSIX tar archives aligning the lzip members to the tar +members with choice of granularity (---bsolid by default, ---dsolid works +like ---asolid). The input archives are kept unchanged. Existing compressed +archives are not overwritten. A hyphen @samp{-} used as the name of an input +archive reads from standard input and writes to standard output (unless the +option @samp{--output} is used). Tarlz can be used as compressor for GNU tar +using a command like @w{@samp{tar -c -Hustar foo | tarlz -z -o foo.tar.lz}}. +Note that tarlz only works reliably on archives without global headers, or +with global headers whose content can be ignored. + +The compression is reversible, including any garbage present after the +end-of-archive blocks. Tarlz stops parsing after the first end-of-archive +block is found, and then compresses the rest of the archive. Unless solid +compression is requested, the end-of-archive blocks are compressed in a lzip +member separated from the preceding members and from any non-zero garbage +following the end-of-archive blocks. @samp{--compress} implies plzip +argument style, not tar style. Each input archive is compressed to a file +with the extension @samp{.lz} added unless the option @samp{--output} is +used. When @samp{--output} is used, only one input archive can be specified. +@samp{-f} can't be used with @samp{--compress}. + +@item --check-lib +Compare the +@uref{http://www.nongnu.org/lzip/manual/lzlib_manual.html#Library-version,,version of lzlib} +used to compile tarlz with the version actually being used at run time and +exit. Report any differences found. Exit with error status 1 if differences +are found. A mismatch may indicate that lzlib is not correctly installed or +that a different version of lzlib has been installed after compiling tarlz. +Exit with error status 2 if LZ_API_VERSION and LZ_version_string don't +match. @w{@samp{tarlz -v --check-lib}} shows the version of lzlib being used +and the value of LZ_API_VERSION (if defined). +@ifnothtml +@xref{Library version,,,lzlib}. +@end ifnothtml + +@end table + +tarlz supports the following +@uref{http://www.nongnu.org/arg-parser/manual/arg_parser_manual.html#Argument-syntax,,options}: +@ifnothtml +@xref{Argument syntax,,,arg_parser}. +@end ifnothtml + +@table @code +@anchor{--data-size} +@item -B @var{bytes} +@itemx --data-size=@var{bytes} +Set target size of input data blocks for the option @samp{--bsolid}. +@xref{--bsolid}. Valid values range from @w{8 KiB} to @w{1 GiB}. Default +value is two times the dictionary size, except for option @samp{-0} where it +defaults to @w{1 MiB}. @xref{Minimum archive sizes}. + +@item -C @var{dir} +@itemx --directory=@var{dir} +Change to directory @var{dir}. When creating or appending, the position of +each @samp{-C} option in the command line is significant; it will change the +current working directory for the following @var{files} until a new +@samp{-C} option appears in the command line. When extracting or comparing, +all the @samp{-C} options are executed in sequence before reading the +archive. Listing ignores any @samp{-C} options specified. @var{dir} is +relative to the then current working directory, perhaps changed by a +previous @samp{-C} option. + +Note that a process can only have one current working directory (CWD). +Therefore multi-threading can't be used to create an archive if a @samp{-C} +option appears after a relative file name in the command line. + +@item -f @var{archive} +@itemx --file=@var{archive} +Use archive file @var{archive}. A hyphen @samp{-} used as an @var{archive} +argument reads from standard input or writes to standard output. + +@item -h +@itemx --dereference +Follow symbolic links during archive creation, appending or comparison. +Archive or compare the files they point to instead of the links themselves. + +@item -n @var{n} +@itemx --threads=@var{n} +Set the number of (de)compression threads, overriding the system's default. +Valid values range from 0 to "as many as your system can support". A value +of 0 disables threads entirely. If this option is not used, tarlz tries to +detect the number of processors in the system and use it as default value. +@w{@samp{tarlz --help}} shows the system's default value. See the note about +multi-threaded archive creation in the option @samp{-C} above. + +Note that the number of usable threads is limited during compression to +@w{ceil( uncompressed_size / data_size )} (@pxref{Minimum archive sizes}), +and during decompression to the number of lzip members in the tar.lz +archive, which you can find by running @w{@samp{lzip -lv archive.tar.lz}}. + +@item -o @var{file} +@itemx --output=@var{file} +Write the compressed output to @var{file}. @w{@samp{-o -}} writes the +compressed output to standard output. Currently @samp{--output} only works +with @samp{--compress}. + +@item -p +@itemx --preserve-permissions +On extraction, set file permissions as they appear in the archive. This is +the default behavior when tarlz is run by the superuser. The default for +other users is to subtract the umask of the user running tarlz from the +permissions specified in the archive. + +@item -q +@itemx --quiet +Quiet operation. Suppress all messages. + +@item -v +@itemx --verbose +Verbosely list files processed. Further -v's (up to 4) increase the +verbosity level. + +@item -0 .. -9 +Set the compression level for @samp{--create}, @samp{--append}, and +@samp{--compress}. The default compression level is @samp{-6}. Like lzip, +tarlz also minimizes the dictionary size of the lzip members it creates, +reducing the amount of memory required for decompression. + +@multitable {Level} {Dictionary size} {Match length limit} +@item Level @tab Dictionary size @tab Match length limit +@item -0 @tab 64 KiB @tab 16 bytes +@item -1 @tab 1 MiB @tab 5 bytes +@item -2 @tab 1.5 MiB @tab 6 bytes +@item -3 @tab 2 MiB @tab 8 bytes +@item -4 @tab 3 MiB @tab 12 bytes +@item -5 @tab 4 MiB @tab 20 bytes +@item -6 @tab 8 MiB @tab 36 bytes +@item -7 @tab 16 MiB @tab 68 bytes +@item -8 @tab 24 MiB @tab 132 bytes +@item -9 @tab 32 MiB @tab 273 bytes +@end multitable + +@item --uncompressed +With @samp{--create}, don't compress the tar archive created. Create an +uncompressed tar archive instead. With @samp{--append}, don't compress the +new members appended to the tar archive. Compressed members can't be +appended to an uncompressed archive, nor vice versa. + +@item --asolid +When creating or appending to a compressed archive, use appendable solid +compression. All the files being added to the archive are compressed into a +single lzip member, but the end-of-archive blocks are compressed into a +separate lzip member. This creates a solidly compressed appendable archive. +Solid archives can't be created nor decoded in parallel. + +@anchor{--bsolid} +@item --bsolid +When creating or appending to a compressed archive, use block compression. +Tar members are compressed together in a lzip member until they approximate +a target uncompressed size. The size can't be exact because each solidly +compressed data block must contain an integer number of tar members. Block +compression is the default because it improves compression ratio for +archives with many files smaller than the block size. This option allows +tarlz revert to default behavior if, for example, it is invoked through an +alias like @w{@samp{tar='tarlz --solid'}}. @xref{--data-size}, to set the +target block size. + +@item --dsolid +When creating or appending to a compressed archive, compress each file +specified in the command line separately in its own lzip member, and use +solid compression for each directory specified in the command line. The +end-of-archive blocks are compressed into a separate lzip member. This +creates a compressed appendable archive with a separate lzip member for each +file or top-level directory specified. + +@item --no-solid +When creating or appending to a compressed archive, compress each file +separately in its own lzip member. The end-of-archive blocks are compressed +into a separate lzip member. This creates a compressed appendable archive +with a lzip member for each file. + +@item --solid +When creating or appending to a compressed archive, use solid compression. +The files being added to the archive, along with the end-of-archive blocks, +are compressed into a single lzip member. The resulting archive is not +appendable. No more files can be later appended to the archive. Solid +archives can't be created nor decoded in parallel. + +@item --anonymous +Equivalent to @w{@samp{--owner=root --group=root}}. + +@item --owner=@var{owner} +When creating or appending, use @var{owner} for files added to the archive. +If @var{owner} is not a valid user name, it is decoded as a decimal numeric +user ID. + +@item --group=@var{group} +When creating or appending, use @var{group} for files added to the archive. +If @var{group} is not a valid group name, it is decoded as a decimal numeric +group ID. + +@item --exclude=@var{pattern} +Exclude files matching a shell pattern like @samp{*.o}. A file is considered +to match if any component of the file name matches. For example, @samp{*.o} +matches @samp{foo.o}, @samp{foo.o/bar} and @samp{foo/bar.o}. If +@var{pattern} contains a @samp{/}, it matches a corresponding @samp{/} in +the file name. For example, @samp{foo/*.o} matches @samp{foo/bar.o}. +Multiple @samp{--exclude} options can be specified. + +@item --ignore-ids +Make @samp{--diff} ignore differences in owner and group IDs. This option is +useful when comparing an @samp{--anonymous} archive. + +@item --ignore-overflow +Make @samp{--diff} ignore differences in mtime caused by overflow on 32-bit +systems with a 32-bit time_t. + +@item --keep-damaged +Don't delete partially extracted files. If a decompression error happens +while extracting a file, keep the partial data extracted. Use this option to +recover as much data as possible from each damaged member. It is recommended +to run tarlz in single-threaded mode (---threads=0) when using this option. + +@item --missing-crc +Exit with error status 2 if the CRC of the extended records is missing. When +this option is used, tarlz detects any corruption in the extended records +(only limited by CRC collisions). But note that a corrupt @samp{GNU.crc32} +keyword, for example @samp{GNU.crc33}, is reported as a missing CRC instead +of as a corrupt record. This misleading @w{@samp{Missing CRC}} message is +the consequence of a flaw in the POSIX pax format; i.e., the lack of a +mandatory check sequence of the extended records. @xref{crc32}. + +@item --mtime=@var{date} +When creating or appending, use @var{date} as the modification time for +files added to the archive instead of their actual modification times. The +value of @var{date} may be either @samp{@@} followed by the number of +seconds since (or before) the epoch, or a date in format +@w{@samp{[-]YYYY-MM-DD HH:MM:SS}} or @samp{[-]YYYY-MM-DDTHH:MM:SS}, or the +name of an existing reference file starting with @samp{.} or @samp{/} whose +modification time is used. The time of day @samp{HH:MM:SS} in the date +format is optional and defaults to @samp{00:00:00}. The epoch is +@w{@samp{1970-01-01 00:00:00 UTC}}. Negative seconds or years define a +modification time before the epoch. + +@item --out-slots=@var{n} +Number of @w{1 MiB} output packets buffered per worker thread during +multi-threaded creation or appending to compressed archives. Increasing the +number of packets may increase compression speed if the files being archived +are larger than @w{64 MiB} compressed, but requires more memory. Valid +values range from 1 to 1024. The default value is 64. + +@item --warn-newer +During archive creation, warn if any file being archived has a modification +time newer than the archive creation time. This option may slow archive +creation somewhat because it makes an extra call to @samp{stat} after +archiving each file, but it guarantees that file contents were not modified +during the creation of the archive. Note that the file must be at least one +second newer than the archive for it to be detected as newer. + +@ignore +@item --permissive +Allow some violations of the archive format, like consecutive extended +headers preceding a ustar header, or several records with the same +keyword appearing in the same block of extended records. +@end ignore + +@end table + +Exit status: 0 for a normal exit, 1 for environmental problems +(file not found, files differ, invalid command line options, I/O errors, +etc), 2 to indicate a corrupt or invalid input file, 3 for an internal +consistency error (e.g., bug) which caused tarlz to panic. + + +@node Portable character set +@chapter POSIX portable filename character set +@cindex portable character set + +The set of characters from which portable file names are constructed. + +@example +A B C D E F G H I J K L M N O P Q R S T U V W X Y Z +a b c d e f g h i j k l m n o p q r s t u v w x y z +0 1 2 3 4 5 6 7 8 9 . _ - +@end example + +The last three characters are the period, underscore, and hyphen-minus +characters, respectively. + +File names are identifiers. Therefore, archiving works better when file +names use only the portable character set without spaces added. + + +@node File format +@chapter File format +@cindex file format + +In the diagram below, a box like this: + +@verbatim ++---+ +| | <-- the vertical bars might be missing ++---+ +@end verbatim + +represents one byte; a box like this: + +@verbatim ++==============+ +| | ++==============+ +@end verbatim + +represents a variable number of bytes or a fixed but large number of +bytes (for example 512). + +@sp 1 +A tar.lz file consists of a series of lzip members (compressed data sets). +The members simply appear one after another in the file, with no additional +information before, between, or after them. + +Each lzip member contains one or more tar members in a simplified POSIX pax +interchange format. The only pax typeflag value supported by tarlz (in +addition to the typeflag values defined by the ustar format) is @samp{x}. +The pax format is an extension on top of the ustar format that removes the +size limitations of the ustar format. + +Each tar member contains one file archived, and is represented by the +following sequence: + +@itemize @bullet +@item +An optional extended header block followed by one or more blocks that +contain the extended header records as if they were the contents of a file; +i.e., the extended header records are included as the data for this header +block. This header block is of the form described in pax header block, with +a typeflag value of @samp{x}. + +@item +A header block in ustar format that describes the file. Any fields defined +in the preceding optional extended header records override the associated +fields in this header block for this file. + +@item +Zero or more blocks that contain the contents of the file. +@end itemize + +Each tar member must be contiguously stored in a lzip member for the +parallel decoding operations like @samp{--list} to work. If any tar member +is split over two or more lzip members, the archive must be decoded +sequentially. @xref{Multi-threaded decoding}. + +At the end of the archive file there are two 512-byte blocks filled with +binary zeros, interpreted as an end-of-archive indicator. These EOA blocks +are either compressed in a separate lzip member or compressed along with the +tar members contained in the last lzip member. For a compressed archive to +be recognized by tarlz as appendable, the last lzip member must contain +between 512 and 32256 zeros alone. + +The diagram below shows the correspondence between each tar member (formed +by one or two headers plus optional data) in the tar archive and each +@uref{http://www.nongnu.org/lzip/manual/lzip_manual.html#File-format,,lzip member} +in the resulting multimember tar.lz archive, when per file compression is +used: +@ifnothtml +@xref{File format,,,lzip}. +@end ifnothtml + +@verbatim +tar ++========+======+=================+===============+========+======+========+ +| header | data | extended header | extended data | header | data | EOA | ++========+======+=================+===============+========+======+========+ + +tar.lz ++===============+=================================================+========+ +| member | member | member | ++===============+=================================================+========+ +@end verbatim + +@ignore +When @samp{--permissive} is used, the following violations of the +archive format are allowed:@* +If several extended headers precede an ustar header, only the last +extended header takes effect. The other extended headers are ignored. +Similarly, if several records with the same keyword appear in the same +block of extended records, only the last record for the repeated keyword +takes effect. The other records for the repeated keyword are ignored.@* +A global header inserted between an extended header and an ustar header.@* +An extended header just before the end-of-archive blocks. +@end ignore + +@sp 1 +@section Pax header block + +The pax header block is identical to the ustar header block described below +except that the typeflag has the value @samp{x} (extended). The field +@samp{size} is the size of the extended header data in bytes. Most other +fields in the pax header block are zeroed on archive creation to prevent +trouble if the archive is read by an ustar tool, and are ignored by tarlz on +archive extraction. @xref{flawed-compat}. + +The pax extended header data consists of one or more records, each of +them constructed as follows:@* +@w{@samp{"%d %s=%s\n", <length>, <keyword>, <value>}} + +The fields <length> and <keyword> in the record must be limited to the +portable character set (@pxref{Portable character set}). The field <length> +contains the decimal length of the record in bytes, including the trailing +newline. The field <value> is stored as-is, without conversion to UTF-8 nor +any other transformation. The fields are separated by the ASCII characters +space, equal-sign, and newline. + +These are the <keyword> values currently supported by tarlz: + +@table @code +@item atime +The signed decimal representation of the access time of the following file +in seconds since (or before) the epoch, obtained from the function +@samp{stat}. The atime record is created only for files with a modification +time outside of the ustar range. @xref{ustar-mtime}. + +@item gid +The unsigned decimal representation of the group ID of the group that owns +the following file. The gid record is created only for files with a group ID +greater than 2_097_151 (octal 7777777). @xref{ustar-uid-gid}. + +@item linkpath +The file name of a link being created to another file, of any type, +previously archived. This record overrides the field @samp{linkname} in the +following ustar header block. The following ustar header block determines +the type of link created. If typeflag of the following header block is 1, it +will be a hard link. If typeflag is 2, it will be a symbolic link and the +linkpath value will be used as the contents of the symbolic link. The +linkpath record is created only for links with a link name that does not fit +in the space provided by the ustar header. + +@item mtime +The signed decimal representation of the modification time of the following +file in seconds since (or before) the epoch, obtained from the function +@samp{stat}. This record overrides the field @samp{mtime} in the following +ustar header block. The mtime record is created only for files with a +modification time outside of the ustar range. @xref{ustar-mtime}. + +@item path +The file name of the following file. This record overrides the fields +@samp{name} and @samp{prefix} in the following ustar header block. The path +record is created for files with a name that does not fit in the space +provided by the ustar header, but is also created for files that require any +other extended record so that the fields @samp{name} and @samp{prefix} in +the following ustar header block can be zeroed. + +@item size +The size of the file in bytes, expressed as a decimal number using digits +from the ISO/IEC 646:1991 (ASCII) standard. This record overrides the field +@samp{size} in the following ustar header block. The size record is created +only for files with a size value greater than 8_589_934_591 +@w{(octal 77777777777)}; that is, @w{8 GiB} (2^33 bytes) or larger. + +@item uid +The unsigned decimal representation of the user ID of the file owner of the +following file. The uid record is created only for files with a user ID +greater than 2_097_151 (octal 7777777). @xref{ustar-uid-gid}. + +@anchor{key_crc32} +@item GNU.crc32 +CRC32-C (Castagnoli) of the extended header data excluding the 8 bytes +representing the CRC <value> itself. The <value> is represented as 8 +hexadecimal digits in big endian order, +@w{@samp{22 GNU.crc32=00000000\n}}. The keyword of the CRC record is +protected by the CRC to guarante that corruption is always detected when +using @samp{--missing-crc} (except in case of CRC collision). A CRC was +chosen because a checksum is too weak for a potentially large list of +variable sized records. A checksum can't detect simple errors like the +swapping of two bytes. + +@end table + +At verbosity level 1 or higher tarlz prints a diagnostic for each unknown +extended header keyword found in an archive, once per keyword. + +@sp 1 +@section Ustar header block + +The ustar header block has a length of 512 bytes and is structured as +shown in the following table. All lengths and offsets are in decimal. + +@multitable {Field Name} {Offset} {Length (in bytes)} +@item Field Name @tab Offset @tab Length (in bytes) +@item name @tab 0 @tab 100 +@item mode @tab 100 @tab 8 +@item uid @tab 108 @tab 8 +@item gid @tab 116 @tab 8 +@item size @tab 124 @tab 12 +@item mtime @tab 136 @tab 12 +@item chksum @tab 148 @tab 8 +@item typeflag @tab 156 @tab 1 +@item linkname @tab 157 @tab 100 +@item magic @tab 257 @tab 6 +@item version @tab 263 @tab 2 +@item uname @tab 265 @tab 32 +@item gname @tab 297 @tab 32 +@item devmajor @tab 329 @tab 8 +@item devminor @tab 337 @tab 8 +@item prefix @tab 345 @tab 155 +@end multitable + +All characters in the header block are coded using the ISO/IEC 646:1991 +(ASCII) standard, except in fields storing names for files, users, and +groups. For maximum portability between implementations, names should only +contain characters from the portable character set (@pxref{Portable +character set}), but if an implementation supports the use of characters +outside of @samp{/} and the portable character set in names for files, +users, and groups, tarlz will use the byte values in these names unmodified. + +The fields @samp{name}, @samp{linkname}, and @samp{prefix} are +null-terminated character strings except when all characters in the array +contain non-null characters including the last character. + +The fields @samp{name} and @samp{prefix} produce the file name. A new file +name is formed, if prefix is not an empty string (its first character is not +null), by concatenating prefix (up to the first null character), a slash +character, and name; otherwise, name is used alone. In either case, name is +terminated at the first null character. If prefix begins with a null +character, it is ignored. In this manner, file names of at most 256 +characters can be supported. If a file name does not fit in the space +provided, an extended record is used to store the file name. + +The field @samp{linkname} does not use the prefix to produce a file name. If +the link name does not fit in the 100 characters provided, an extended +record is used to store the link name. + +The field @samp{mode} provides 12 access permission bits. The following +table shows the symbolic name of each bit and its octal value: + +@multitable {Bit Name} {Value} {Bit Name} {Value} {Bit Name} {Value} +@headitem Bit Name @tab Value @tab Bit Name @tab Value @tab Bit Name @tab Value +@item S_ISUID @tab 04000 @tab S_ISGID @tab 02000 @tab S_ISVTX @tab 01000 +@item S_IRUSR @tab 00400 @tab S_IWUSR @tab 00200 @tab S_IXUSR @tab 00100 +@item S_IRGRP @tab 00040 @tab S_IWGRP @tab 00020 @tab S_IXGRP @tab 00010 +@item S_IROTH @tab 00004 @tab S_IWOTH @tab 00002 @tab S_IXOTH @tab 00001 +@end multitable + +@anchor{ustar-uid-gid} +The fields @samp{uid} and @samp{gid} are the user and group IDs of the owner +and group of the file, respectively. If the file uid or gid are greater than +2_097_151 (octal 7777777), an extended record is used to store the uid or gid. + +The field @samp{size} contains the octal representation of the size of the +file in bytes. If the field @samp{typeflag} specifies a file of type '0' +(regular file) or '7' (high performance regular file), the number of logical +records following the header is @w{(size / 512)} rounded to the next +integer. For all other values of typeflag, tarlz either sets the size field +to 0 or ignores it, and does not store or expect any logical records +following the header. If the file size is larger than 8_589_934_591 bytes +@w{(octal 77777777777)}, an extended record is used to store the file size. + +@anchor{ustar-mtime} +The field @samp{mtime} contains the octal representation of the modification +time of the file at the time it was archived, obtained from the function +@samp{stat}. If the modification time is negative or larger than +8_589_934_591 @w{(octal 77777777777)} seconds since the epoch, an extended +record is used to store the modification time. The ustar range of mtime goes +from @w{@samp{1970-01-01 00:00:00 UTC}} to @w{@samp{2242-03-16 12:56:31 UTC}}. + +The field @samp{chksum} contains the octal representation of the value of +the simple sum of all bytes in the header logical record. Each byte in the +header is treated as an unsigned value. When calculating the checksum, the +chksum field is treated as if it were all space characters. + +The field @samp{typeflag} contains a single character specifying the type of +file archived: + +@table @code +@item '0' +Regular file. + +@item '1' +Hard link to another file, of any type, previously archived. Hard links must +not contain file data. + +@item '2' +Symbolic link. + +@item '3', '4' +Character special file and block special file respectively. In this case the +fields @samp{devmajor} and @samp{devminor} contain information defining the +device in unspecified format. + +@item '5' +Directory. + +@item '6' +FIFO special file. + +@item '7' +Reserved to represent a file to which an implementation has associated some +high-performance attribute (contiguous file). Tarlz treats this type of file +as a regular file (type 0). + +@end table + +The field @samp{magic} contains the ASCII null-terminated string "ustar". +The field @samp{version} contains the characters "00" (0x30,0x30). The +fields @samp{uname} and @samp{gname} are null-terminated character strings +except when all characters in the array contain non-null characters +including the last character. Each numeric field contains a leading space- +or zero-filled, optionally null-terminated octal number using digits from +the ISO/IEC 646:1991 (ASCII) standard. Tarlz is able to decode numeric +fields 1 byte longer than standard ustar by not requiring a terminating null +character. + + +@node Amendments to pax format +@chapter The reasons for the differences with pax +@cindex Amendments to pax format + +Tarlz creates safe archives that allow the reliable detection of invalid or +corrupt metadata during decoding even when the integrity checking of lzip +can't be used because the lzip members are only decompressed partially, as +it happens in parallel @samp{--diff}, @samp{--list}, and @samp{--extract}. +In order to achieve this goal and avoid some other flaws in the pax format, +tarlz makes some changes to the variant of the pax format that it uses. This +chapter describes these changes and the concrete reasons to implement them. + +@sp 1 +@anchor{crc32} +@section Add a CRC of the extended records + +The POSIX pax format has a serious flaw. The metadata stored in pax extended +records are not protected by any kind of check sequence. Corruption in a +long file name may cause the extraction of the file in the wrong place +without warning. Corruption in a large file size may cause the truncation of +the file or the appending of garbage to the file, both followed by a +spurious warning about a corrupt header far from the place of the undetected +corruption. + +Metadata like file name and file size must be always protected in an archive +format because of the adverse effects of undetected corruption in them, +potentially much worse that undetected corruption in the data. Even more so +in the case of pax because the amount of metadata it stores is potentially +large, making undetected corruption and archiver misbehavior more probable. + +Headers and metadata must be protected separately from data because the +integrity checking of lzip may not be able to detect the corruption before +the metadata has been used, for example, to create a new file in the wrong +place. + +Because of the above, tarlz protects the extended records with a Cyclic +Redundancy Check (CRC) in a way compatible with standard tar tools. +@xref{key_crc32}. + +@sp 1 +@anchor{flawed-compat} +@section Remove flawed backward compatibility + +In order to allow the extraction of pax archives by a tar utility conforming +to the POSIX-2:1993 standard, POSIX.1-2008 recommends selecting extended +header field values that allow such tar to create a regular file containing +the extended header records as data. This approach is broken because if the +extended header is needed because of a long file name, the fields +@samp{name} and @samp{prefix} will be unable to contain the full file name. +(Some tar implementations store the truncated name in the field @samp{name} +alone, truncating the name to only 100 bytes instead of 256). Therefore the +files corresponding to both the extended header and the overridden ustar +header will be extracted using truncated file names, perhaps overwriting +existing files or directories. It may be a security risk to extract a file +with a truncated file name. + +To avoid this problem, tarlz writes extended headers with all fields zeroed +except @samp{size} (which contains the size of the extended records), +@samp{chksum}, @samp{typeflag}, @samp{magic}, and @samp{version}. In +particular, tarlz sets the fields @samp{name} and @samp{prefix} to zero. +This prevents old tar programs from extracting the extended records as a +file in the wrong place. Tarlz also sets to zero those fields of the ustar +header overridden by extended records. Finally, tarlz skips members with +zeroed @samp{name} and @samp{prefix} when decoding, except when listing. +This is needed to detect certain format violations during parallel +extraction. + +If an extended header is required for any reason (for example a file size of +@w{8 GiB} or larger, or a link name longer than 100 bytes), tarlz also moves +the file name to the extended records to prevent an ustar tool from trying +to extract the file or link. This also makes easier during parallel decoding +the detection of a tar member split between two lzip members at the boundary +between the extended header and the ustar header. + +@sp 1 +@section As simple as possible (but not simpler) + +The tarlz format is mainly ustar. Extended pax headers are used only when +needed because the length of a file name or link name, or the size or other +attribute of a file exceed the limits of the ustar format. Adding @w{1 KiB} +of extended header and records to each member just to save subsecond +timestamps seems wasteful for a backup format. Moreover, minimizing the +overhead may help recovering the archive with lziprecover in case of +corruption. + +Global pax headers are tolerated, but not supported; they are parsed and +ignored. Some operations may not behave as expected if the archive contains +global headers. + +@sp 1 +@section Improve reproducibility + +Pax includes by default the process ID of the pax process in the ustar name +of the extended headers, making the archive not reproducible. Tarlz stores +the true name of the file just once, either in the ustar header or in the +extended records, making it easier to produce reproducible archives. + +Pax allows an extended record to have length x-1 or x if x is a power of +ten; @samp{99<97_bytes>} or @samp{100<97_bytes>}. Tarlz minimizes the length +of the record and always produces a length of x-1 in these cases. + +@sp 1 +@section No data in hard links + +Tarlz does not allow data in hard link members. The data (if any) must be in +the member determining the type of the file (which can't be a link). If all +the names of a file are stored as hard links, the type of the file is lost. +Not allowing data in hard links also prevents invalid actions like +extracting file data for a hard link to a symbolic link or to a directory. + +@sp 1 +@section Avoid misconversions to/from UTF-8 + +There is no portable way to tell what charset a text string is coded into. +Therefore, tarlz stores all fields representing text strings unmodified, +without conversion to UTF-8 nor any other transformation. This prevents +accidental double UTF-8 conversions. If the need arises this behavior will +be adjusted with a command line option in the future. + + +@node Program design +@chapter Internal structure of tarlz +@cindex program design + +The parts of tarlz related to sequential processing of the archive are more +or less similar to any other tar and won't be described here. The interesting +parts described here are those related to Multi-threaded processing. + +The structure of the part of tarlz performing Multi-threaded archive +creation is somewhat similar to that of +@uref{http://www.nongnu.org/lzip/plzip.html#Program-design,,plzip} with the +added complication of the solidity levels. +@ifnothtml +@xref{Program design,,,plzip}. +@end ifnothtml +A grouper thread and several worker threads are created, acting the main +thread as muxer (multiplexer) thread. A "packet courier" takes care of data +transfers among threads and limits the maximum number of data blocks +(packets) being processed simultaneously. + +The grouper traverses the directory tree, groups together the metadata of +the files to be archived in each lzip member, and distributes them to the +workers. The workers compress the metadata received from the grouper along +with the file data read from the file system. The muxer collects processed +packets from the workers, and writes them to the archive. + +@verbatim +.--------. +| data|---> to each worker below +| | .------------. +| file | ,-->| worker 0 |--, +| system | | `------------' | +| | .---------. | .------------. | .-------. .---------. +|metadata|--->| grouper |-+-->| worker 1 |--+-->| muxer |-->| archive | +`--------' `---------' | `------------' | `-------' `---------' + | ... | + | .------------. | + `-->| worker N-1 |--' + `------------' +@end verbatim + +Decoding an archive is somewhat similar to how plzip decompresses a regular +file to standard output, with the differences that it is not the data but +only messages what is written to stdout/stderr, and that each worker may +access files in the file system either to read them (diff) or write them +(extract). As in plzip, each worker reads members directly from the archive. + +@verbatim +.--------. +| file |<---> data to/from each worker below +| system | +`--------' .------------. + ,-->| worker 0 |--, + | `------------' | +.---------. | .------------. | .-------. .--------. +| archive |-+-->| worker 1 |--+-->| muxer |-->| stdout | +`---------' | `------------' | `-------' | stderr | + | ... | `--------' + | .------------. | + `-->| worker N-1 |--' + `------------' +@end verbatim + +As misaligned tar.lz archives can't be decoded in parallel, and the +misalignment can't be detected until after decoding has started, a +"mastership request" mechanism has been designed that allows the decoding to +continue instead of signalling an error. + +During parallel decoding, if a worker finds a misalignment, it requests +mastership to decode the rest of the archive. When mastership is requested, +an error_member_id is set, and all subsequently received packets with +member_id > error_member_id are rejected. All workers requesting mastership +are blocked at the request_mastership call until mastership is granted. +Mastership is granted to the delivering worker when its queue is empty to +make sure that all preceding packets have been processed. When mastership is +granted, all packets are deleted and all subsequently received packets not +coming from the master are rejected. + +If a worker can't continue decoding for any cause (for example lack of +memory or finding a split tar member at the beginning of a lzip member), it +requests mastership to print an error and terminate the program. Only if +some other worker requests mastership in a previous lzip member can this +error be avoided. + + +@node Multi-threaded decoding +@chapter Limitations of parallel tar decoding +@cindex parallel tar decoding + +Safely decoding an arbitrary tar archive in parallel is only possible if one +decodes the headers sequentially first. For example, if a tar archive +containing another tar archive is decoded starting from some position other +than the beginning, there is no way to know if the first header found there +belongs to the outer tar archive or to the inner tar archive. Tar is a +format inherently serial; it was designed for tapes. + +The pax format is even more serial than the ustar format. Two headers need +to be decoded sequentially for each file. The extended header may even need +parsing to reveal something as basic as file size. If a thread decodes the +ustar header skipping the preceding extended header, it may extract a file +of incorrect size at the wrong place. Moreover, a pax archive with global +headers can't be decoded in parallel because each thread can't know about +the global headers decoded by other threads. + +In the case of compressed tar archives, the start of each compressed block +determines one point through which the tar archive can be decoded in +parallel. Therefore, in tar.lz archives the decoding operations can't be +parallelized if the tar members are not aligned with the lzip members. Tar +archives compressed with plzip can't be decoded in parallel because tar and +plzip do not have a way to align both sets of members. Certainly one can +decompress one such archive with a multi-threaded tool like plzip, but the +increase in speed is not as large as it could be because plzip must +serialize the decompressed data and pass them to tar, which decodes them +sequentially, one tar member at a time. + +On the other hand, if the tar.lz archive is created with a tool like tarlz, +which can guarantee the alignment between tar members and lzip members +because it controls both archiving and compression, then the lzip format +becomes an indexed layer on top of the tar archive which makes possible +decoding it safely in parallel. + +Tarlz is able to automatically decode aligned and unaligned multimember +tar.lz archives, keeping backwards compatibility. If tarlz finds a member +misalignment during multi-threaded decoding, it switches to single-threaded +mode and continues decoding the archive. + +If the files in the archive are large, multi-threaded @samp{--list} on a +regular (seekable) tar.lz archive can be hundreds of times faster than +sequential @samp{--list} because, in addition to using several processors, +it only needs to decompress part of each lzip member. See the following +example listing the Silesia corpus on a dual core machine: + +@example +tarlz -9 --no-solid -cf silesia.tar.lz silesia +time lzip -cd silesia.tar.lz | tar -tf - (5.032s) +time plzip -cd silesia.tar.lz | tar -tf - (3.256s) +time tarlz -tf silesia.tar.lz (0.020s) +@end example + +On the other hand, multi-threaded @samp{--list} won't detect corruption in +the tar member data because it only decodes the part of each lzip member +corresponding to the tar member header. This is another reason why the tar +headers must provide their own integrity checking. + +@sp 1 +@anchor{mt-extraction} +@section Limitations of multi-threaded extraction + +Multi-threaded extraction may produce different output than single-threaded +extraction in some cases: + +During multi-threaded extraction, several independent threads are +simultaneously reading the archive and creating files in the file system. +The archive is not read sequentially. As a consequence, any error or +weirdness in the archive (like a corrupt member or an end-of-archive block +in the middle of the archive) won't be usually detected until part of the +archive beyond that point has been processed. + +If the archive contains two or more tar members with the same name, +single-threaded extraction extracts the members in the order they appear in +the archive and leaves in the file system the last version of the file. But +multi-threaded extraction may extract the members in any order and leave in +the file system any version of the file nondeterministically. It is +unspecified which of the tar members is extracted. + +If the same file is extracted through several paths (different member names +resolve to the same file in the file system), the result is undefined. +(Probably the resulting file will be mangled). + +Extraction of a hard link may fail if it is extracted before the file it +links to. + + +@node Minimum archive sizes +@chapter Minimum archive sizes required for multi-threaded block compression +@cindex minimum archive sizes + +When creating or appending to a compressed archive using multi-threaded +block compression, tarlz puts tar members together in blocks and compresses +as many blocks simultaneously as worker threads are chosen, creating a +multimember compressed archive. + +For this to work as expected (and roughly multiply the compression speed by +the number of available processors), the uncompressed archive must be at +least as large as the number of worker threads times the block size +(@pxref{--data-size}). Else some processors will not get any data to +compress, and compression will be proportionally slower. The maximum speed +increase achievable on a given archive is limited by the ratio +@w{(uncompressed_size / data_size)}. For example, a tarball the size of gcc +or linux will scale up to 10 or 14 processors at level -9. + +The following table shows the minimum uncompressed archive size needed for +full use of N processors at a given compression level, using the default +data size for each level: + +@multitable {Processors} {512 MiB} {512 MiB} {512 MiB} {512 MiB} {512 MiB} {512 MiB} +@headitem Processors @tab 2 @tab 4 @tab 8 @tab 16 @tab 64 @tab 256 +@item Level +@item -0 @tab 2 MiB @tab 4 MiB @tab 8 MiB @tab 16 MiB @tab 64 MiB @tab 256 MiB +@item -1 @tab 4 MiB @tab 8 MiB @tab 16 MiB @tab 32 MiB @tab 128 MiB @tab 512 MiB +@item -2 @tab 6 MiB @tab 12 MiB @tab 24 MiB @tab 48 MiB @tab 192 MiB @tab 768 MiB +@item -3 @tab 8 MiB @tab 16 MiB @tab 32 MiB @tab 64 MiB @tab 256 MiB @tab 1 GiB +@item -4 @tab 12 MiB @tab 24 MiB @tab 48 MiB @tab 96 MiB @tab 384 MiB @tab 1.5 GiB +@item -5 @tab 16 MiB @tab 32 MiB @tab 64 MiB @tab 128 MiB @tab 512 MiB @tab 2 GiB +@item -6 @tab 32 MiB @tab 64 MiB @tab 128 MiB @tab 256 MiB @tab 1 GiB @tab 4 GiB +@item -7 @tab 64 MiB @tab 128 MiB @tab 256 MiB @tab 512 MiB @tab 2 GiB @tab 8 GiB +@item -8 @tab 96 MiB @tab 192 MiB @tab 384 MiB @tab 768 MiB @tab 3 GiB @tab 12 GiB +@item -9 @tab 128 MiB @tab 256 MiB @tab 512 MiB @tab 1 GiB @tab 4 GiB @tab 16 GiB +@end multitable + + +@node Examples +@chapter A small tutorial with examples +@cindex examples + +@noindent +Example 1: Create a multimember compressed archive @samp{archive.tar.lz} +containing files @samp{a}, @samp{b} and @samp{c}. + +@example +tarlz -cf archive.tar.lz a b c +@end example + +@sp 1 +@noindent +Example 2: Append files @samp{d} and @samp{e} to the multimember compressed +archive @samp{archive.tar.lz}. + +@example +tarlz -rf archive.tar.lz d e +@end example + +@sp 1 +@noindent +Example 3: Create a solidly compressed appendable archive +@samp{archive.tar.lz} containing files @samp{a}, @samp{b} and @samp{c}. +Then append files @samp{d} and @samp{e} to the archive. + +@example +tarlz --asolid -cf archive.tar.lz a b c +tarlz --asolid -rf archive.tar.lz d e +@end example + +@sp 1 +@noindent +Example 4: Create a compressed appendable archive containing directories +@samp{dir1}, @samp{dir2} and @samp{dir3} with a separate lzip member per +directory. Then append files @samp{a}, @samp{b}, @samp{c}, @samp{d} and +@samp{e} to the archive, all of them contained in a single lzip member. +The resulting archive @samp{archive.tar.lz} contains 5 lzip members +(including the end-of-archive member). + +@example +tarlz --dsolid -cf archive.tar.lz dir1 dir2 dir3 +tarlz --asolid -rf archive.tar.lz a b c d e +@end example + +@sp 1 +@noindent +Example 5: Create a solidly compressed archive @samp{archive.tar.lz} +containing files @samp{a}, @samp{b} and @samp{c}. Note that no more +files can be later appended to the archive. + +@example +tarlz --solid -cf archive.tar.lz a b c +@end example + +@sp 1 +@noindent +Example 6: Extract all files from archive @samp{archive.tar.lz}. + +@example +tarlz -xf archive.tar.lz +@end example + +@sp 1 +@noindent +Example 7: Extract files @samp{a} and @samp{c}, and the whole tree under +directory @samp{dir1} from archive @samp{archive.tar.lz}. + +@example +tarlz -xf archive.tar.lz a c dir1 +@end example + +@sp 1 +@noindent +Example 8: Copy the contents of directory @samp{sourcedir} to the directory +@samp{destdir}. + +@example +tarlz -C sourcedir --uncompressed -cf - . | tarlz -C destdir -xf - +@end example + +@sp 1 +@noindent +Example 9: Compress the existing POSIX archive @samp{archive.tar} and write +the output to @samp{archive.tar.lz}. Compress each member individually for +maximum availability. (If one member in the compressed archive gets damaged, +the other members can still be extracted). + +@example +tarlz -z --no-solid archive.tar +@end example + +@sp 1 +@noindent +Example 10: Compress the archive @samp{archive.tar} and write the output to +@samp{foo.tar.lz}. + +@example +tarlz -z -o foo.tar.lz archive.tar +@end example + +@sp 1 +@noindent +Example 11: Concatenate and compress two archives @samp{archive1.tar} and +@samp{archive2.tar}, and write the output to @samp{foo.tar.lz}. + +@example +tarlz -A archive1.tar archive2.tar | tarlz -z -o foo.tar.lz +@end example + + +@node Problems +@chapter Reporting bugs +@cindex bugs +@cindex getting help + +There are probably bugs in tarlz. There are certainly errors and +omissions in this manual. If you report them, they will get fixed. If +you don't, no one will ever know about them and they will remain unfixed +for all eternity, if not longer. + +If you find a bug in tarlz, please send electronic mail to +@email{lzip-bug@@nongnu.org}. Include the version number, which you can +find by running @w{@samp{tarlz --version}} and +@w{@samp{tarlz -v --check-lib}}. + + +@node Concept index +@unnumbered Concept index + +@printindex cp + +@bye diff --git a/exclude.cc b/exclude.cc new file mode 100644 index 0000000..99491ca --- /dev/null +++ b/exclude.cc @@ -0,0 +1,54 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cstdlib> +#include <fnmatch.h> + +#include "tarlz.h" + + +namespace Exclude { + +std::vector< std::string > patterns; // list of patterns + +} // end namespace Exclude + + +void Exclude::add_pattern( const std::string & arg ) + { patterns.push_back( arg ); } + + +bool Exclude::excluded( const char * const filename ) + { + if( patterns.empty() ) return false; + const char * p = filename; + do { + for( unsigned i = 0; i < patterns.size(); ++i ) + // ignore a trailing sequence starting with '/' in filename +#ifdef FNM_LEADING_DIR + if( fnmatch( patterns[i].c_str(), p, FNM_LEADING_DIR ) == 0 ) return true; +#else + if( fnmatch( patterns[i].c_str(), p, 0 ) == 0 || + fnmatch( ( patterns[i] + "/*" ).c_str(), p, 0 ) == 0 ) return true; +#endif + while( *p && *p != '/' ) ++p; // skip component + while( *p == '/' ) ++p; // skip slashes + } while( *p ); + return false; + } diff --git a/extended.cc b/extended.cc new file mode 100644 index 0000000..f05d15f --- /dev/null +++ b/extended.cc @@ -0,0 +1,415 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cctype> +#include <cerrno> +#include <cstdio> +#include <cstdlib> + +#include "tarlz.h" + + +const CRC32 crc32c( true ); + + +namespace { + +unsigned long long record_size( const unsigned keyword_size, + const unsigned long value_size ) + { + /* length + ' ' + keyword + '=' + value + '\n' + minimize length; prefer "99<97_bytes>" to "100<97_bytes>" */ + unsigned long long size = 1 + keyword_size + 1 + value_size + 1; + size += decimal_digits( decimal_digits( size ) + size ); + return size; + } + + +unsigned long long parse_decimal( const char * const ptr, + const char ** const tailp, + const unsigned long long size ) + { + unsigned long long result = 0; + unsigned long long i = 0; + while( i < size && std::isspace( (unsigned char)ptr[i] ) ) ++i; + if( !std::isdigit( (unsigned char)ptr[i] ) ) + { if( tailp ) *tailp = ptr; return 0; } + for( ; i < size && std::isdigit( (unsigned char)ptr[i] ); ++i ) + { + const unsigned long long prev = result; + result *= 10; result += ptr[i] - '0'; + if( result < prev || result > LLONG_MAX ) // overflow + { if( tailp ) *tailp = ptr; return 0; } + } + if( tailp ) *tailp = ptr + i; + return result; + } + + +uint32_t parse_record_crc( const char * const ptr ) + { + uint32_t crc = 0; + for( int i = 0; i < 8; ++i ) + { + crc <<= 4; + if( ptr[i] >= '0' && ptr[i] <= '9' ) crc += ptr[i] - '0'; + else if( ptr[i] >= 'A' && ptr[i] <= 'F' ) crc += ptr[i] + 10 - 'A'; + else if( ptr[i] >= 'a' && ptr[i] <= 'f' ) crc += ptr[i] + 10 - 'a'; + else { crc = 0; break; } // invalid digit in crc string + } + return crc; + } + + +unsigned char xdigit( const unsigned value ) + { + if( value <= 9 ) return '0' + value; + if( value <= 15 ) return 'A' + value - 10; + return 0; + } + +void print_hex( char * const buf, int size, unsigned long long num ) + { while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; } } + +void print_decimal( char * const buf, int size, unsigned long long num ) + { while( --size >= 0 ) { buf[size] = num % 10 + '0'; num /= 10; } } + +unsigned long long print_size_keyword( char * const buf, + const unsigned long long size, const char * keyword ) + { + // "size keyword=value\n" + unsigned long long pos = decimal_digits( size ); + print_decimal( buf, pos, size ); buf[pos++] = ' '; + while( *keyword ) { buf[pos++] = *keyword; ++keyword; } buf[pos++] = '='; + return pos; + } + +bool print_record( char * const buf, const unsigned long long size, + const char * keyword, const std::string & value ) + { + unsigned long long pos = print_size_keyword( buf, size, keyword ); + std::memcpy( buf + pos, value.c_str(), value.size() ); + pos += value.size(); buf[pos++] = '\n'; + return pos == size; + } + +bool print_record( char * const buf, const int size, + const char * keyword, const unsigned long long value ) + { + int pos = print_size_keyword( buf, size, keyword ); + const int vd = decimal_digits( value ); + print_decimal( buf + pos, vd, value ); pos += vd; buf[pos++] = '\n'; + return pos == size; + } + +bool print_record( char * const buf, const int size, + const char * keyword, const Etime & value ) + { + int pos = print_size_keyword( buf, size, keyword ); + pos += value.print( buf + pos ); buf[pos++] = '\n'; + return pos == size; + } + +} // end namespace + + +unsigned Etime::decimal_size() const + { + unsigned size = 1 + ( sec_ < 0 ); // first digit + negative sign + for( long long n = sec_; n >= 10 || n <= -10; n /= 10 ) ++size; + if( nsec_ > 0 && nsec_ <= 999999999 ) + { size += 2; // decimal point + first fractional digit + for( int n = nsec_; n >= 10; n /= 10 ) ++size; } + return size; + } + +unsigned Etime::print( char * const buf ) const + { + int len = 0; + if( nsec_ > 0 && nsec_ <= 999999999 ) + { for( int n = nsec_; n > 0; n /= 10 ) buf[len++] = n % 10 + '0'; + buf[len++] = '.'; } + long long n = sec_; + do { long long on = n; n /= 10; buf[len++] = llabs( on - 10 * n ) + '0'; } + while( n != 0 ); + if( sec_ < 0 ) buf[len++] = '-'; + for( int i = 0; i < len / 2; ++i ) std::swap( buf[i], buf[len-i-1] ); + return len; + } + +bool Etime::parse( const char * const ptr, const char ** const tailp, + const long long size ) + { + char * tail; + errno = 0; + long long s = strtoll( ptr, &tail, 10 ); + if( tail == ptr || errno || + ( *tail != 0 && *tail != '\n' && *tail != '.' ) ) return false; + int ns = 0; + if( *tail == '.' ) // parse nanoseconds and any extra digits + { + ++tail; + if( tail - ptr >= size || !std::isdigit( (unsigned char)*tail ) ) + return false; + for( int factor = 100000000; + tail - ptr < size && std::isdigit( (unsigned char)*tail ); + ++tail, factor /= 10 ) + ns += factor * ( *tail - '0' ); + } + sec_ = s; nsec_ = ns; if( tailp ) *tailp = tail; + return true; + } + + +std::vector< std::string > Extended::unknown_keywords; +const std::string Extended::crc_record( "22 GNU.crc32=00000000\n" ); + +void Extended::calculate_sizes() const + { + linkpath_recsize_ = linkpath_.size() ? record_size( 8, linkpath_.size() ) : 0; + path_recsize_ = path_.size() ? record_size( 4, path_.size() ) : 0; + file_size_recsize_ = + ( file_size_ > 0 ) ? record_size( 4, decimal_digits( file_size_ ) ) : 0; + uid_recsize_ = ( uid_ >= 0 ) ? record_size( 3, decimal_digits( uid_ ) ) : 0; + gid_recsize_ = ( gid_ >= 0 ) ? record_size( 3, decimal_digits( gid_ ) ) : 0; + atime_recsize_ = + atime_.out_of_ustar_range() ? record_size( 5, atime_.decimal_size() ) : 0; + mtime_recsize_ = + mtime_.out_of_ustar_range() ? record_size( 5, mtime_.decimal_size() ) : 0; + edsize_ = linkpath_recsize_ + path_recsize_ + file_size_recsize_ + + uid_recsize_ + gid_recsize_ + atime_recsize_ + mtime_recsize_ + + crc_record.size(); + padded_edsize_ = round_up( edsize_ ); + full_size_ = header_size + padded_edsize_; + } + + +// print a diagnostic for each unknown keyword once per keyword +void Extended::unknown_keyword( const char * const buf, + const unsigned long long size ) const + { + unsigned long long eq_pos = 0; // position of '=' in buf + while( eq_pos < size && buf[eq_pos] != '=' ) ++eq_pos; + const std::string keyword( buf, eq_pos ); + for( unsigned i = 0; i < unknown_keywords.size(); ++i ) + if( keyword == unknown_keywords[i] ) return; + unknown_keywords.push_back( keyword ); + print_error( 0, "Ignoring unknown extended header keyword '%s'", + keyword.c_str() ); + } + + +// Return the size of the extended block, -1 if error, -2 if out of memory. +long long Extended::format_block( Resizable_buffer & rbuf ) const + { + if( empty() ) return 0; // no extended data + const unsigned long long bufsize = full_size(); // recalculate sizes + if( edsize_ <= 0 ) return 0; // no extended data + if( edsize_ >= 1LL << 33 ) return -1; // too much extended data + if( !rbuf.resize( bufsize ) ) return -2; // extended block buffer + uint8_t * const header = rbuf.u8(); // extended header + char * const buf = rbuf() + header_size; // extended records + init_tar_header( header ); + header[typeflag_o] = tf_extended; // fill only required fields + print_octal( header + size_o, size_l - 1, edsize_ ); + print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) ); + + if( path_recsize_ && !print_record( buf, path_recsize_, "path", path_ ) ) + return -1; + long long pos = path_recsize_; + if( linkpath_recsize_ && + !print_record( buf + pos, linkpath_recsize_, "linkpath", linkpath_ ) ) + return -1; + pos += linkpath_recsize_; + if( file_size_recsize_ && + !print_record( buf + pos, file_size_recsize_, "size", file_size_ ) ) + return -1; + pos += file_size_recsize_; + if( uid_recsize_ && !print_record( buf + pos, uid_recsize_, "uid", uid_ ) ) + return -1; + pos += uid_recsize_; + if( gid_recsize_ && !print_record( buf + pos, gid_recsize_, "gid", gid_ ) ) + return -1; + pos += gid_recsize_; + if( atime_recsize_ && + !print_record( buf + pos, atime_recsize_, "atime", atime_ ) ) + return -1; + pos += atime_recsize_; + if( mtime_recsize_ && + !print_record( buf + pos, mtime_recsize_, "mtime", mtime_ ) ) + return -1; + pos += mtime_recsize_; + const unsigned crc_size = Extended::crc_record.size(); + std::memcpy( buf + pos, Extended::crc_record.c_str(), crc_size ); + pos += crc_size; + if( pos != edsize_ ) return -1; + print_hex( buf + edsize_ - 9, 8, + crc32c.windowed_crc( (const uint8_t *)buf, edsize_ - 9, edsize_ ) ); + if( padded_edsize_ > edsize_ ) // set padding to zero + std::memset( buf + edsize_, 0, padded_edsize_ - edsize_ ); + crc_present_ = true; + return bufsize; + } + + +bool Extended::parse( const char * const buf, const unsigned long long edsize, + const bool permissive ) + { + reset(); full_size_ = -1; // invalidate cached sizes + for( unsigned long long pos = 0; pos < edsize; ) // parse records + { + const char * tail; + const unsigned long long rsize = + parse_decimal( buf + pos, &tail, edsize - pos ); + if( rsize == 0 || rsize > edsize - pos || + tail[0] != ' ' || buf[pos+rsize-1] != '\n' ) return false; + ++tail; // point to keyword + // rest = length of (keyword + '=' + value) without the final newline + const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail; + if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 ) + { + if( path_.size() && !permissive ) return false; + unsigned long long len = rest - 5; + while( len > 1 && tail[5+len-1] == '/' ) --len; // trailing '/' + path_.assign( tail + 5, len ); + // this also truncates path_ at the first embedded null character + path_.assign( remove_leading_dotslash( path_.c_str(), &removed_prefix ) ); + } + else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 ) + { + if( linkpath_.size() && !permissive ) return false; + unsigned long long len = rest - 9; + while( len > 1 && tail[9+len-1] == '/' ) --len; // trailing '/' + linkpath_.assign( tail + 9, len ); + } + else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 ) + { + if( file_size_ != 0 && !permissive ) return false; + file_size_ = parse_decimal( tail + 5, &tail, rest - 5 ); + // parse error or size fits in ustar header + if( file_size_ < 1LL << 33 || file_size_ > max_file_size || + tail != buf + ( pos + rsize - 1 ) ) return false; + } + else if( rest > 4 && std::memcmp( tail, "uid=", 4 ) == 0 ) + { + if( uid_ >= 0 && !permissive ) return false; + uid_ = parse_decimal( tail + 4, &tail, rest - 4 ); + // parse error or uid fits in ustar header + if( uid_ < 1 << 21 || tail != buf + ( pos + rsize - 1 ) ) return false; + } + else if( rest > 4 && std::memcmp( tail, "gid=", 4 ) == 0 ) + { + if( gid_ >= 0 && !permissive ) return false; + gid_ = parse_decimal( tail + 4, &tail, rest - 4 ); + // parse error or gid fits in ustar header + if( gid_ < 1 << 21 || tail != buf + ( pos + rsize - 1 ) ) return false; + } + else if( rest > 6 && std::memcmp( tail, "atime=", 6 ) == 0 ) + { + if( atime_.isvalid() && !permissive ) return false; + if( !atime_.parse( tail + 6, &tail, rest - 6 ) || // parse error + tail != buf + ( pos + rsize - 1 ) ) return false; + } + else if( rest > 6 && std::memcmp( tail, "mtime=", 6 ) == 0 ) + { + if( mtime_.isvalid() && !permissive ) return false; + if( !mtime_.parse( tail + 6, &tail, rest - 6 ) || // parse error + tail != buf + ( pos + rsize - 1 ) ) return false; + } + else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 ) + { + if( crc_present_ && !permissive ) return false; + if( rsize != crc_record.size() ) return false; + crc_present_ = true; + const uint32_t stored_crc = parse_record_crc( tail + 10 ); + const uint32_t computed_crc = + crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize ); + if( stored_crc != computed_crc ) + { + if( verbosity >= 2 ) + std::fprintf( stderr, "CRC32C = %08X\n", (unsigned)computed_crc ); + return false; + } + } + else if( ( rest < 8 || std::memcmp( tail, "comment=", 8 ) != 0 ) && + verbosity >= 1 ) unknown_keyword( tail, rest ); + pos += rsize; + } + return true; + } + + +/* If not already initialized, copy linkpath, path, file_size, uid, gid, + atime, and mtime from ustar header. */ +void Extended::fill_from_ustar( const Tar_header header ) + { + if( linkpath_.empty() ) // copy linkpath from ustar header + { + int len = 0; + while( len < linkname_l && header[linkname_o+len] ) ++len; + while( len > 1 && header[linkname_o+len-1] == '/' ) --len; // trailing '/' + if( len > 0 ) + { + linkpath_.assign( (const char *)header + linkname_o, len ); + full_size_ = -1; + } + } + + if( path_.empty() ) // copy path from ustar header + { // the entire path may be in prefix + char stored_name[prefix_l+1+name_l+1]; + int len = 0; + while( len < prefix_l && header[prefix_o+len] ) + { stored_name[len] = header[prefix_o+len]; ++len; } + if( len && header[name_o] ) stored_name[len++] = '/'; + for( int i = 0; i < name_l && header[name_o+i]; ++i ) + { stored_name[len] = header[name_o+i]; ++len; } + while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/' + stored_name[len] = 0; + path( remove_leading_dotslash( stored_name, &removed_prefix ) ); + } + + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( file_size_ == 0 && // copy file_size from ustar header + ( typeflag == tf_regular || typeflag == tf_hiperf ) ) + file_size( parse_octal( header + size_o, size_l ) ); + if( uid_ < 0 ) uid_ = parse_octal( header + uid_o, uid_l ); + if( gid_ < 0 ) gid_ = parse_octal( header + gid_o, gid_l ); + if( !atime_.isvalid() ) + atime_.set( parse_octal( header + mtime_o, mtime_l ) ); // 33 bits + if( !mtime_.isvalid() ) + mtime_.set( parse_octal( header + mtime_o, mtime_l ) ); // 33 bits + } + + +/* Return file size from record or from ustar header, and reset file_size_. + Used for fast parsing of headers in uncompressed archives. +*/ +long long Extended::get_file_size_and_reset( const Tar_header header ) + { + const long long tmp = file_size_; + file_size( 0 ); // reset full_size_ + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_regular || typeflag == tf_hiperf ) + { + if( tmp == 0 ) return parse_octal( header + size_o, size_l ); + else return tmp; + } + return 0; + } diff --git a/lzip_index.cc b/lzip_index.cc new file mode 100644 index 0000000..b886d2b --- /dev/null +++ b/lzip_index.cc @@ -0,0 +1,221 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <algorithm> +#include <cerrno> +#include <cstdio> +#include <unistd.h> + +#include "tarlz.h" +#include "lzip_index.h" + + +int seek_read( const int fd, uint8_t * const buf, const int size, + const long long pos ) + { + if( lseek( fd, pos, SEEK_SET ) == pos ) + return readblock( fd, buf, size ); + return 0; + } + +namespace { + +const char * bad_version( const unsigned version ) + { + static char buf[80]; + snprintf( buf, sizeof buf, "Version %u member format not supported.", + version ); + return buf; + } + +} // end namespace + + +bool Lzip_index::check_header_error( const Lzip_header & header, + const bool first ) + { + if( !header.verify_magic() ) + { error_ = bad_magic_msg; retval_ = 2; if( first ) bad_magic_ = true; + return true; } + if( !header.verify_version() ) + { error_ = bad_version( header.version() ); retval_ = 2; return true; } + if( !isvalid_ds( header.dictionary_size() ) ) + { error_ = bad_dict_msg; retval_ = 2; return true; } + return false; + } + +void Lzip_index::set_errno_error( const char * const msg ) + { + error_ = msg; error_ += std::strerror( errno ); + retval_ = 1; + } + +void Lzip_index::set_num_error( const char * const msg, unsigned long long num ) + { + char buf[80]; + snprintf( buf, sizeof buf, "%s%llu", msg, num ); + error_ = buf; + retval_ = 2; + } + + +bool Lzip_index::read_header( const int fd, Lzip_header & header, + const long long pos ) + { + if( seek_read( fd, header.data, Lzip_header::size, pos ) != Lzip_header::size ) + { set_errno_error( "Error reading member header: " ); return false; } + return true; + } + + +// If successful, push last member and set pos to member header. +bool Lzip_index::skip_trailing_data( const int fd, unsigned long long & pos, + const bool ignore_trailing, + const bool loose_trailing ) + { + if( pos < min_member_size ) return false; + enum { block_size = 16384, + buffer_size = block_size + Lzip_trailer::size - 1 + Lzip_header::size }; + uint8_t buffer[buffer_size]; + int bsize = pos % block_size; // total bytes in buffer + if( bsize <= buffer_size - block_size ) bsize += block_size; + int search_size = bsize; // bytes to search for trailer + int rd_size = bsize; // bytes to read from file + unsigned long long ipos = pos - rd_size; // aligned to block_size + + while( true ) + { + if( seek_read( fd, buffer, rd_size, ipos ) != rd_size ) + { set_errno_error( "Error seeking member trailer: " ); return false; } + const uint8_t max_msb = ( ipos + search_size ) >> 56; + for( int i = search_size; i >= Lzip_trailer::size; --i ) + if( buffer[i-1] <= max_msb ) // most significant byte of member_size + { + const Lzip_trailer & trailer = + *(const Lzip_trailer *)( buffer + i - Lzip_trailer::size ); + const unsigned long long member_size = trailer.member_size(); + if( member_size == 0 ) // skip trailing zeros + { while( i > Lzip_trailer::size && buffer[i-9] == 0 ) --i; continue; } + if( member_size > ipos + i || !trailer.verify_consistency() ) + continue; + Lzip_header header; + if( !read_header( fd, header, ipos + i - member_size ) ) return false; + if( !header.verify() ) continue; + const Lzip_header & header2 = *(const Lzip_header *)( buffer + i ); + const bool full_h2 = bsize - i >= Lzip_header::size; + if( header2.verify_prefix( bsize - i ) ) // last member + { + if( !full_h2 ) error_ = "Last member in input file is truncated."; + else if( !check_header_error( header2, false ) ) + error_ = "Last member in input file is truncated or corrupt."; + retval_ = 2; return false; + } + if( !loose_trailing && full_h2 && header2.verify_corrupt() ) + { error_ = corrupt_mm_msg; retval_ = 2; return false; } + if( !ignore_trailing ) + { error_ = trailing_msg; retval_ = 2; return false; } + pos = ipos + i - member_size; + const unsigned dictionary_size = header.dictionary_size(); + member_vector.push_back( Member( 0, trailer.data_size(), pos, + member_size, dictionary_size ) ); + if( dictionary_size_ < dictionary_size ) + dictionary_size_ = dictionary_size; + return true; + } + if( ipos == 0 ) + { set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size ); + return false; } + bsize = buffer_size; + search_size = bsize - Lzip_header::size; + rd_size = block_size; + ipos -= rd_size; + std::memcpy( buffer + rd_size, buffer, buffer_size - rd_size ); + } + } + + +Lzip_index::Lzip_index( const int infd, const bool ignore_trailing, + const bool loose_trailing ) + : insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ), dictionary_size_( 0 ), + bad_magic_( false ) + { + if( insize < 0 ) + { set_errno_error( "Input file is not seekable: " ); return; } + if( insize < min_member_size ) + { error_ = "Input file is too short."; retval_ = 2; return; } + if( insize > INT64_MAX ) + { error_ = "Input file is too long (2^63 bytes or more)."; + retval_ = 2; return; } + + Lzip_header header; + if( !read_header( infd, header, 0 ) ) return; + if( check_header_error( header, true ) ) return; + + unsigned long long pos = insize; // always points to a header or to EOF + while( pos >= min_member_size ) + { + Lzip_trailer trailer; + if( seek_read( infd, trailer.data, Lzip_trailer::size, + pos - Lzip_trailer::size ) != Lzip_trailer::size ) + { set_errno_error( "Error reading member trailer: " ); break; } + const unsigned long long member_size = trailer.member_size(); + if( member_size > pos || !trailer.verify_consistency() ) // bad trailer + { + if( member_vector.empty() ) + { if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) ) + continue; else return; } + set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size ); + break; + } + if( !read_header( infd, header, pos - member_size ) ) break; + if( !header.verify() ) // bad header + { + if( member_vector.empty() ) + { if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) ) + continue; else return; } + set_num_error( "Bad header at pos ", pos - member_size ); + break; + } + pos -= member_size; + const unsigned dictionary_size = header.dictionary_size(); + member_vector.push_back( Member( 0, trailer.data_size(), pos, + member_size, dictionary_size ) ); + if( dictionary_size_ < dictionary_size ) + dictionary_size_ = dictionary_size; + } + if( pos != 0 || member_vector.empty() ) + { + member_vector.clear(); + if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; } + return; + } + std::reverse( member_vector.begin(), member_vector.end() ); + for( unsigned long i = 0; ; ++i ) + { + const long long end = member_vector[i].dblock.end(); + if( end < 0 || end > INT64_MAX ) + { + member_vector.clear(); + error_ = "Data in input file is too long (2^63 bytes or more)."; + retval_ = 2; return; + } + if( i + 1 >= member_vector.size() ) break; + member_vector[i+1].dblock.pos( end ); + } + } diff --git a/lzip_index.h b/lzip_index.h new file mode 100644 index 0000000..af8aaa4 --- /dev/null +++ b/lzip_index.h @@ -0,0 +1,93 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef INT64_MAX +#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL +#endif + + +class Block + { + long long pos_, size_; // pos + size <= INT64_MAX + +public: + Block( const long long p, const long long s ) : pos_( p ), size_( s ) {} + + long long pos() const { return pos_; } + long long size() const { return size_; } + long long end() const { return pos_ + size_; } + + void pos( const long long p ) { pos_ = p; } + void size( const long long s ) { size_ = s; } + }; + + +class Lzip_index + { + struct Member + { + Block dblock, mblock; // data block, member block + unsigned dictionary_size; + + Member( const long long dp, const long long ds, + const long long mp, const long long ms, const unsigned dict_size ) + : dblock( dp, ds ), mblock( mp, ms ), dictionary_size( dict_size ) {} + }; + + std::vector< Member > member_vector; + std::string error_; + const long long insize; + int retval_; + unsigned dictionary_size_; // largest dictionary size in the file + bool bad_magic_; // bad magic in first header + + bool check_header_error( const Lzip_header & header, const bool first ); + void set_errno_error( const char * const msg ); + void set_num_error( const char * const msg, unsigned long long num ); + bool read_header( const int fd, Lzip_header & header, const long long pos ); + bool skip_trailing_data( const int fd, unsigned long long & pos, + const bool ignore_trailing, const bool loose_trailing ); + +public: + Lzip_index( const int infd, const bool ignore_trailing, + const bool loose_trailing ); + + long members() const { return member_vector.size(); } + const std::string & error() const { return error_; } + int retval() const { return retval_; } + unsigned dictionary_size() const { return dictionary_size_; } + bool bad_magic() const { return bad_magic_; } + + long long udata_size() const + { if( member_vector.empty() ) return 0; + return member_vector.back().dblock.end(); } + + long long cdata_size() const + { if( member_vector.empty() ) return 0; + return member_vector.back().mblock.end(); } + + // total size including trailing data (if any) + long long file_size() const + { if( insize >= 0 ) return insize; else return 0; } + + const Block & dblock( const long i ) const + { return member_vector[i].dblock; } + const Block & mblock( const long i ) const + { return member_vector[i].mblock; } + unsigned dictionary_size( const long i ) const + { return member_vector[i].dictionary_size; } + }; @@ -0,0 +1,723 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +/* + Exit status: 0 for a normal exit, 1 for environmental problems + (file not found, files differ, invalid command line options, I/O errors, + etc), 2 to indicate a corrupt or invalid input file, 3 for an internal + consistency error (e.g., bug) which caused tarlz to panic. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include <cctype> +#include <cerrno> +#include <cstdarg> +#include <cstdio> +#include <cstdlib> +#include <ctime> +#include <fcntl.h> +#include <pthread.h> +#include <stdint.h> // for lzlib.h +#include <unistd.h> +#include <sys/stat.h> +#include <grp.h> +#include <pwd.h> +#include <lzlib.h> +#if defined __OS2__ +#include <io.h> +#endif + +#include "tarlz.h" +#include "arg_parser.h" + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#if CHAR_BIT != 8 +#error "Environments where CHAR_BIT != 8 are not supported." +#endif + +int verbosity = 0; +const char * const program_name = "tarlz"; + +namespace { + +const char * const program_year = "2022"; +const char * invocation_name = program_name; // default value + + +void show_help( const long num_online ) + { + std::printf( "Tarlz is a massively parallel (multi-threaded) combined implementation of\n" + "the tar archiver and the lzip compressor. Tarlz uses the compression library\n" + "lzlib.\n" + "\nTarlz creates, lists, and extracts archives in a simplified and safer\n" + "variant of the POSIX pax format compressed in lzip format, keeping the\n" + "alignment between tar members and lzip members. The resulting multimember\n" + "tar.lz archive is fully backward compatible with standard tar tools like GNU\n" + "tar, which treat it like any other tar.lz archive. Tarlz can append files to\n" + "the end of such compressed archives.\n" + "\nKeeping the alignment between tar members and lzip members has two\n" + "advantages. It adds an indexed lzip layer on top of the tar archive, making\n" + "it possible to decode the archive safely in parallel. It also minimizes the\n" + "amount of data lost in case of corruption.\n" + "\nThe tarlz file format is a safe POSIX-style backup format. In case of\n" + "corruption, tarlz can extract all the undamaged members from the tar.lz\n" + "archive, skipping over the damaged members, just like the standard\n" + "(uncompressed) tar. Moreover, the option '--keep-damaged' can be used to\n" + "recover as much data as possible from each damaged member, and lziprecover\n" + "can be used to recover some of the damaged members.\n" + "\nUsage: %s operation [options] [files]\n", invocation_name ); + std::printf( "\nOperations:\n" + " --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -A, --concatenate append archives to the end of an archive\n" + " -c, --create create a new archive\n" + " -d, --diff find differences between archive and file system\n" + " --delete delete files/directories from an archive\n" + " -r, --append append files to the end of an archive\n" + " -t, --list list the contents of an archive\n" + " -x, --extract extract files/directories from an archive\n" + " -z, --compress compress existing POSIX tar archives\n" + " --check-lib check version of lzlib and exit\n" + "\nOptions:\n" + " -B, --data-size=<bytes> set target size of input data blocks [2x8=16 MiB]\n" + " -C, --directory=<dir> change to directory <dir>\n" + " -f, --file=<archive> use archive file <archive>\n" + " -h, --dereference follow symlinks; archive the files they point to\n" + " -n, --threads=<n> set number of (de)compression threads [%ld]\n" + " -o, --output=<file> compress to <file>\n" + " -p, --preserve-permissions don't subtract the umask on extraction\n" + " -q, --quiet suppress all messages\n" + " -v, --verbose verbosely list files processed\n" + " -0 .. -9 set compression level [default 6]\n" + " --uncompressed don't compress the archive created\n" + " --asolid create solidly compressed appendable archive\n" + " --bsolid create per block compressed archive (default)\n" + " --dsolid create per directory compressed archive\n" + " --no-solid create per file compressed archive\n" + " --solid create solidly compressed archive\n" + " --anonymous equivalent to '--owner=root --group=root'\n" + " --owner=<owner> use <owner> name/ID for files added to archive\n" + " --group=<group> use <group> name/ID for files added to archive\n" + " --exclude=<pattern> exclude files matching a shell pattern\n" + " --ignore-ids ignore differences in owner and group IDs\n" + " --ignore-overflow ignore mtime overflow differences on 32-bit\n" + " --keep-damaged don't delete partially extracted files\n" + " --missing-crc exit with error status if missing extended CRC\n" + " --mtime=<date> use <date> as mtime for files added to archive\n" + " --out-slots=<n> number of 1 MiB output packets buffered [64]\n" + " --warn-newer warn if any file is newer than the archive\n" +/* " --permissive allow repeated extended headers and records\n"*/, + num_online ); + if( verbosity >= 1 ) + { + std::printf( " --debug=<level> (0-1) print debug statistics to stderr\n" ); + } + std::printf( "\nIf no archive is specified, tarlz tries to read it from standard input or\n" + "write it to standard output.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems\n" + "(file not found, files differ, invalid command line options, I/O errors,\n" + "etc), 2 to indicate a corrupt or invalid input file, 3 for an internal\n" + "consistency error (e.g., bug) which caused tarlz to panic.\n" + "\nReport bugs to lzip-bug@nongnu.org\n" + "Tarlz home page: http://www.nongnu.org/lzip/tarlz.html\n" ); + } + + +void show_version() + { + std::printf( "%s %s\n", program_name, PROGVERSION ); + std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); + std::printf( "Using lzlib %s\n", LZ_version() ); + std::printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n" ); + } + + +int check_lzlib_ver() // <major>.<minor> or <major>.<minor>[a-z.-]* + { +#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012 + const unsigned char * p = (unsigned char *)LZ_version_string; + unsigned major = 0, minor = 0; + while( major < 100000 && isdigit( *p ) ) + { major *= 10; major += *p - '0'; ++p; } + if( *p == '.' ) ++p; + else +out: { show_error( "Invalid LZ_version_string in lzlib.h" ); return 2; } + while( minor < 100 && isdigit( *p ) ) + { minor *= 10; minor += *p - '0'; ++p; } + if( *p && *p != '-' && *p != '.' && !std::islower( *p ) ) goto out; + const unsigned version = major * 1000 + minor; + if( LZ_API_VERSION != version ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Version mismatch in lzlib.h: " + "LZ_API_VERSION = %u, should be %u.\n", + program_name, LZ_API_VERSION, version ); + return 2; + } +#endif + return 0; + } + + +int check_lib() + { + int retval = check_lzlib_ver(); + if( std::strcmp( LZ_version_string, LZ_version() ) != 0 ) + { set_retval( retval, 1 ); + if( verbosity >= 0 ) + std::printf( "warning: LZ_version_string != LZ_version() (%s vs %s)\n", + LZ_version_string, LZ_version() ); } +#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012 + if( LZ_API_VERSION != LZ_api_version() ) + { set_retval( retval, 1 ); + if( verbosity >= 0 ) + std::printf( "warning: LZ_API_VERSION != LZ_api_version() (%u vs %u)\n", + LZ_API_VERSION, LZ_api_version() ); } +#endif + if( verbosity >= 1 ) + { + std::printf( "Using lzlib %s\n", LZ_version() ); +#if !defined LZ_API_VERSION + std::fputs( "LZ_API_VERSION is not defined.\n", stdout ); +#elif LZ_API_VERSION >= 1012 + std::printf( "Using LZ_API_VERSION = %u\n", LZ_api_version() ); +#else + std::printf( "Compiled with LZ_API_VERSION = %u. " + "Using an unknown LZ_API_VERSION\n", LZ_API_VERSION ); +#endif + } + return retval; + } + + +// separate numbers of 6 or more digits in groups of 3 digits using '_' +const char * format_num3( long long num ) + { + const char * const si_prefix = "kMGTPEZY"; + const char * const binary_prefix = "KMGTPEZY"; + enum { buffers = 8, bufsize = 4 * sizeof num }; + static char buffer[buffers][bufsize]; // circle of static buffers for printf + static int current = 0; + + char * const buf = buffer[current++]; current %= buffers; + char * p = buf + bufsize - 1; // fill the buffer backwards + *p = 0; // terminator + const bool negative = num < 0; + char prefix = 0; // try binary first, then si + for( int i = 0; i < 8 && num != 0 && ( num / 1024 ) * 1024 == num; ++i ) + { num /= 1024; prefix = binary_prefix[i]; } + if( prefix ) *(--p) = 'i'; + else + for( int i = 0; i < 8 && num != 0 && ( num / 1000 ) * 1000 == num; ++i ) + { num /= 1000; prefix = si_prefix[i]; } + if( prefix ) *(--p) = prefix; + const bool split = num >= 100000 || num <= -100000; + + for( int i = 0; ; ) + { + long long onum = num; num /= 10; + *(--p) = llabs( onum - 10 * num ) + '0'; if( num == 0 ) break; + if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; } + } + if( negative ) *(--p) = '-'; + return p; + } + + +void show_option_error( const char * const arg, const char * const msg, + const char * const option_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: '%s': %s option '%s'.\n", + program_name, arg, msg, option_name ); + } + + +long long getnum( const char * const arg, const char * const option_name, + const long long llimit = LLONG_MIN, + const long long ulimit = LLONG_MAX ) + { + char * tail; + errno = 0; + long long result = strtoll( arg, &tail, 0 ); + if( tail == arg ) + { show_option_error( arg, "Bad or missing numerical argument in", + option_name ); std::exit( 1 ); } + + if( !errno && tail[0] ) + { + const int factor = ( tail[1] == 'i' ) ? 1024 : 1000; + int exponent = 0; // 0 = bad multiplier + switch( tail[0] ) + { + case 'Y': exponent = 8; break; + case 'Z': exponent = 7; break; + case 'E': exponent = 6; break; + case 'P': exponent = 5; break; + case 'T': exponent = 4; break; + case 'G': exponent = 3; break; + case 'M': exponent = 2; break; + case 'K': if( factor == 1024 ) exponent = 1; break; + case 'k': if( factor == 1000 ) exponent = 1; break; + } + if( exponent <= 0 ) + { show_option_error( arg, "Bad multiplier in numerical argument of", + option_name ); std::exit( 1 ); } + for( int i = 0; i < exponent; ++i ) + { + if( ( result >= 0 && LLONG_MAX / factor >= result ) || + ( result < 0 && LLONG_MIN / factor <= result ) ) result *= factor; + else { errno = ERANGE; break; } + } + } + if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; + if( errno ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in " + "option '%s'.\n", program_name, arg, format_num3( llimit ), + format_num3( ulimit ), option_name ); + std::exit( 1 ); + } + return result; + } + + +void set_archive_name( std::string & archive_name, const std::string & new_name ) + { + static bool first_call = true; + + if( first_call ) { if( new_name != "-" ) archive_name = new_name; + first_call = false; return; } + show_error( "Only one archive can be specified.", 0, true ); + std::exit( 1 ); + } + + +void set_mode( Program_mode & program_mode, const Program_mode new_mode ) + { + if( program_mode != m_none && program_mode != new_mode ) + { + show_error( "Only one operation can be specified.", 0, true ); + std::exit( 1 ); + } + program_mode = new_mode; + } + + +// parse time as 'long long' even if time_t is 32-bit +long long parse_mtime( const char * arg, const char * const pn ) + { + if( *arg == '@' ) return getnum( arg + 1, pn ); + else if( *arg == '.' || *arg == '/' ) + { + struct stat st; + if( stat( arg, &st ) == 0 ) return st.st_mtime; + show_file_error( arg, "Can't stat mtime reference file", errno ); + std::exit( 1 ); + } + else // format '[-]YYYY-MM-DD[[[<separator>HH]:MM]:SS]' + { + long long y; // long long because 2147483648-01-01 overflows int + unsigned mo, d, h, m, s; + char sep; + const int n = std::sscanf( arg, "%lld-%u-%u%c%u:%u:%u", + &y, &mo, &d, &sep, &h, &m, &s ); + if( n >= 3 && n <= 7 && n != 4 && ( n == 3 || sep == ' ' || sep == 'T' ) ) + { + if( y >= INT_MIN + 1900 && y <= INT_MAX && mo >= 1 && mo <= 12 ) + { + struct tm t; + t.tm_year = y - 1900; t.tm_mon = mo - 1; t.tm_mday = d; + t.tm_hour = ( n >= 5 ) ? h : 0; t.tm_min = ( n >= 6 ) ? m : 0; + t.tm_sec = ( n >= 7 ) ? s : 0; t.tm_isdst = -1; + errno = 0; + const long long mtime = std::mktime( &t ); + if( mtime != -1 || errno == 0 ) return mtime; // valid datetime + } + show_option_error( arg, "Date out of limits in", pn ); std::exit( 1 ); + } + } + show_option_error( arg, "Unknown date format in", pn ); std::exit( 1 ); + } + + +long long parse_owner( const char * const arg, const char * const pn ) + { + const struct passwd * const pw = getpwnam( arg ); + if( pw ) return pw->pw_uid; + if( std::isdigit( (unsigned char)arg[0] ) ) + return getnum( arg, pn, 0, LLONG_MAX ); + if( std::strcmp( arg, "root" ) == 0 ) return 0; + show_option_error( arg, "Invalid owner in", pn ); std::exit( 1 ); + } + +long long parse_group( const char * const arg, const char * const pn ) + { + const struct group * const gr = getgrnam( arg ); + if( gr ) return gr->gr_gid; + if( std::isdigit( (unsigned char)arg[0] ) ) + return getnum( arg, pn, 0, LLONG_MAX ); + if( std::strcmp( arg, "root" ) == 0 ) return 0; + show_option_error( arg, "Invalid group in", pn ); std::exit( 1 ); + } + +} // end namespace + + +int hstat( const char * const filename, struct stat * const st, + const bool dereference ) + { return dereference ? stat( filename, st ) : lstat( filename, st ); } + + +int open_instream( const std::string & name ) + { + const int infd = open( name.c_str(), O_RDONLY | O_BINARY ); + if( infd < 0 ) + { show_file_error( name.c_str(), "Can't open for reading", errno ); + return -1; } + struct stat st; // infd must not be a directory + if( fstat( infd, &st ) == 0 && S_ISDIR( st.st_mode ) ) + { show_file_error( name.c_str(), "Can't read. Is a directory." ); + close( infd ); return -1; } + return infd; + } + + +int open_outstream( const std::string & name, const bool create, + Resizable_buffer * const rbufp, const bool force ) + { + const int cflags = O_CREAT | O_WRONLY | ( force ? O_TRUNC : O_EXCL ); + const int flags = ( create ? cflags : O_RDWR ) | O_BINARY; + const mode_t outfd_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; + + const int outfd = open( name.c_str(), flags, outfd_mode ); + if( outfd < 0 ) + { + const char * msg = !create ? "Error opening file" : + ( ( errno == EEXIST ) ? "Skipping file" : "Can't create file" ); + if( !rbufp ) show_file_error( name.c_str(), msg, errno ); + else format_file_error( *rbufp, name.c_str(), msg, errno ); + } + return outfd; + } + + +/* This can be called from any thread, main thread or sub-threads alike, + since they all call common helper functions that call exit_fail_mt() + in case of an error. +*/ +void exit_fail_mt( const int retval ) + { + // calling 'exit' more than once results in undefined behavior + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + + pthread_mutex_lock( &mutex ); // ignore errors to avoid loop + std::exit( retval ); + } + + +void show_error( const char * const msg, const int errcode, const bool help ) + { + if( verbosity < 0 ) return; + if( msg && msg[0] ) + std::fprintf( stderr, "%s: %s%s%s\n", program_name, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + if( help ) + std::fprintf( stderr, "Try '%s --help' for more information.\n", + invocation_name ); + } + + +bool format_error( Resizable_buffer & rbuf, const int errcode, + const char * const format, ... ) + { + if( verbosity < 0 ) { rbuf.resize( 1 ); rbuf()[0] = 0; return false; } + va_list args; + for( int i = 0; i < 2; ++i ) // resize rbuf if not large enough + { + int len = snprintf( rbuf(), rbuf.size(), "%s: ", program_name ); + if( len >= (int)rbuf.size() && !rbuf.resize( len + 1 ) ) break; + va_start( args, format ); + len += vsnprintf( rbuf() + len, rbuf.size() - len, format, args ); + va_end( args ); + if( len >= (int)rbuf.size() && !rbuf.resize( len + 1 ) ) break; + if( errcode <= 0 ) rbuf()[len++] = '\n'; + else len += snprintf( rbuf() + len, rbuf.size() - len, ": %s\n", + std::strerror( errcode ) ); + if( len < (int)rbuf.size() || !rbuf.resize( len + 1 ) ) break; + } + return true; + } + + +void print_error( const int errcode, const char * const format, ... ) + { + if( verbosity < 0 ) return; + va_list args; + std::fprintf( stderr, "%s: ", program_name ); + va_start( args, format ); + std::vfprintf( stderr, format, args ); + va_end( args ); + if( errcode <= 0 ) std::fputc( '\n', stderr ); + else std::fprintf( stderr, ": %s\n", std::strerror( errcode ) ); + } + + +void format_file_error( std::string & estr, const char * const filename, + const char * const msg, const int errcode ) + { + if( verbosity < 0 ) return; + estr += program_name; estr += ": "; estr += filename; estr += ": "; + estr += msg; + if( errcode > 0 ) { estr += ": "; estr += std::strerror( errcode ); } + estr += '\n'; + } + +bool format_file_error( Resizable_buffer & rbuf, const char * const filename, + const char * const msg, const int errcode ) + { + if( verbosity < 0 ) { rbuf.resize( 1 ); rbuf()[0] = 0; return false; } + for( int i = 0; i < 2; ++i ) // resize rbuf if not large enough + { + const int len = snprintf( rbuf(), rbuf.size(), "%s: %s: %s%s%s\n", + program_name, filename, msg, ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + if( len < (int)rbuf.size() || !rbuf.resize( len + 1 ) ) break; + } + return true; + } + +void show_file_error( const char * const filename, const char * const msg, + const int errcode ) + { + if( verbosity >= 0 && msg && msg[0] ) + std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + } + + +void internal_error( const char * const msg ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); + std::exit( 3 ); + } + + +int main( const int argc, const char * const argv[] ) + { + if( argc > 0 ) invocation_name = argv[0]; + + enum { opt_ano = 256, opt_aso, opt_bso, opt_chk, opt_crc, opt_dbg, opt_del, + opt_dso, opt_exc, opt_grp, opt_hlp, opt_id, opt_kd, opt_mti, opt_nso, + opt_ofl, opt_out, opt_own, opt_per, opt_sol, opt_un, opt_wn }; + const Arg_parser::Option options[] = + { + { '0', 0, Arg_parser::no }, + { '1', 0, Arg_parser::no }, + { '2', 0, Arg_parser::no }, + { '3', 0, Arg_parser::no }, + { '4', 0, Arg_parser::no }, + { '5', 0, Arg_parser::no }, + { '6', 0, Arg_parser::no }, + { '7', 0, Arg_parser::no }, + { '8', 0, Arg_parser::no }, + { '9', 0, Arg_parser::no }, + { 'A', "concatenate", Arg_parser::no }, + { 'B', "data-size", Arg_parser::yes }, + { 'c', "create", Arg_parser::no }, + { 'C', "directory", Arg_parser::yes }, + { 'd', "diff", Arg_parser::no }, + { 'f', "file", Arg_parser::yes }, + { 'h', "dereference", Arg_parser::no }, + { 'H', "format", Arg_parser::yes }, + { 'n', "threads", Arg_parser::yes }, + { 'o', "output", Arg_parser::yes }, + { 'p', "preserve-permissions", Arg_parser::no }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "append", Arg_parser::no }, + { 't', "list", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { 'x', "extract", Arg_parser::no }, + { 'z', "compress", Arg_parser::no }, + { opt_ano, "anonymous", Arg_parser::no }, + { opt_aso, "asolid", Arg_parser::no }, + { opt_bso, "bsolid", Arg_parser::no }, + { opt_chk, "check-lib", Arg_parser::no }, + { opt_dbg, "debug", Arg_parser::yes }, + { opt_del, "delete", Arg_parser::no }, + { opt_dso, "dsolid", Arg_parser::no }, + { opt_exc, "exclude", Arg_parser::yes }, + { opt_grp, "group", Arg_parser::yes }, + { opt_hlp, "help", Arg_parser::no }, + { opt_id, "ignore-ids", Arg_parser::no }, + { opt_kd, "keep-damaged", Arg_parser::no }, + { opt_crc, "missing-crc", Arg_parser::no }, + { opt_mti, "mtime", Arg_parser::yes }, + { opt_nso, "no-solid", Arg_parser::no }, + { opt_ofl, "ignore-overflow", Arg_parser::no }, + { opt_out, "out-slots", Arg_parser::yes }, + { opt_own, "owner", Arg_parser::yes }, + { opt_per, "permissive", Arg_parser::no }, + { opt_sol, "solid", Arg_parser::no }, + { opt_un, "uncompressed", Arg_parser::no }, + { opt_wn, "warn-newer", Arg_parser::no }, + { 0, 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options, true ); // in_order + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 1; } + Cl_options cl_opts( parser ); + + const long num_online = std::max( 1L, sysconf( _SC_NPROCESSORS_ONLN ) ); + long max_workers = sysconf( _SC_THREAD_THREADS_MAX ); + if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) ) + max_workers = INT_MAX / sizeof (pthread_t); + + const char * f_pn = 0; + const char * o_pn = 0; + const char * z_pn = 0; + for( int argind = 0; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) // skip non-options + { + if( parser.argument( argind ).empty() ) + { show_error( "Empty non-option argument." ); return 1; } + if( parser.argument( argind ) != "-" ) cl_opts.filenames_given = true; + ++cl_opts.num_files; continue; + } + const char * const pn = parser.parsed_name( argind ).c_str(); + const std::string & sarg = parser.argument( argind ); + const char * const arg = sarg.c_str(); + switch( code ) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + cl_opts.level = code - '0'; break; + case 'A': set_mode( cl_opts.program_mode, m_concatenate ); break; + case 'B': cl_opts.data_size = + getnum( arg, pn, min_data_size, max_data_size ); break; + case 'c': set_mode( cl_opts.program_mode, m_create ); break; + case 'C': break; // skip chdir + case 'd': set_mode( cl_opts.program_mode, m_diff ); break; + case 'f': set_archive_name( cl_opts.archive_name, sarg ); f_pn = pn; break; + case 'h': cl_opts.dereference = true; break; + case 'H': break; // ignore format + case 'n': cl_opts.num_workers = getnum( arg, pn, 0, max_workers ); break; + case 'o': cl_opts.output_filename = sarg; o_pn = pn; break; + case 'p': cl_opts.preserve_permissions = true; break; + case 'q': verbosity = -1; break; + case 'r': set_mode( cl_opts.program_mode, m_append ); break; + case 't': set_mode( cl_opts.program_mode, m_list ); break; + case 'v': if( verbosity < 4 ) ++verbosity; break; + case 'V': show_version(); return 0; + case 'x': set_mode( cl_opts.program_mode, m_extract ); break; + case 'z': set_mode( cl_opts.program_mode, m_compress ); z_pn = pn; break; + case opt_ano: cl_opts.uid = parse_owner( "root", pn ); + cl_opts.gid = parse_group( "root", pn ); break; + case opt_aso: cl_opts.solidity = asolid; break; + case opt_bso: cl_opts.solidity = bsolid; break; + case opt_crc: cl_opts.missing_crc = true; break; + case opt_chk: return check_lib(); + case opt_dbg: cl_opts.debug_level = getnum( arg, pn, 0, 3 ); break; + case opt_del: set_mode( cl_opts.program_mode, m_delete ); break; + case opt_dso: cl_opts.solidity = dsolid; break; + case opt_exc: Exclude::add_pattern( sarg ); break; + case opt_grp: cl_opts.gid = parse_group( arg, pn ); break; + case opt_hlp: show_help( num_online ); return 0; + case opt_id: cl_opts.ignore_ids = true; break; + case opt_kd: cl_opts.keep_damaged = true; break; + case opt_mti: cl_opts.mtime = parse_mtime( arg, pn ); + cl_opts.mtime_set = true; break; + case opt_nso: cl_opts.solidity = no_solid; break; + case opt_ofl: cl_opts.ignore_overflow = true; break; + case opt_out: cl_opts.out_slots = getnum( arg, pn, 1, 1024 ); break; + case opt_own: cl_opts.uid = parse_owner( arg, pn ); break; + case opt_per: cl_opts.permissive = true; break; + case opt_sol: cl_opts.solidity = solid; break; + case opt_un: cl_opts.level = -1; break; + case opt_wn: cl_opts.warn_newer = true; break; + default : internal_error( "uncaught option" ); + } + } // end process options + + if( cl_opts.program_mode != m_compress && o_pn ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Option '%s' can only be used with " + "'-z, --compress'.\n", program_name, o_pn ); + return 1; + } + if( cl_opts.program_mode == m_compress && f_pn ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Option '%s' can't be used with '%s'.\n", + program_name, f_pn, z_pn ); + return 1; + } + if( cl_opts.program_mode == m_compress && + ( cl_opts.level < 0 || cl_opts.level > 9 ) ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Option '--uncompressed' can't be used with '%s'.\n", + program_name, z_pn ); + return 1; + } + +#if !defined LZ_API_VERSION || LZ_API_VERSION < 1012 // compile-time test +#error "lzlib 1.12 or newer needed." +#endif + if( LZ_api_version() < 1012 ) // runtime test + { show_error( "Wrong library version. At least lzlib 1.12 is required." ); + return 1; } + +#if defined __OS2__ + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + if( cl_opts.data_size <= 0 && cl_opts.level >= 0 && cl_opts.level <= 9 ) + { + if( cl_opts.level == 0 ) cl_opts.data_size = 1 << 20; + else cl_opts.data_size = 2 * option_mapping[cl_opts.level].dictionary_size; + } + if( cl_opts.num_workers < 0 ) // 0 disables multi-threading + cl_opts.num_workers = std::min( num_online, max_workers ); + + switch( cl_opts.program_mode ) + { + case m_none: show_error( "Missing operation.", 0, true ); return 1; + case m_append: + case m_create: return encode( cl_opts ); + case m_compress: return compress( cl_opts ); + case m_concatenate: return concatenate( cl_opts ); + case m_delete: return delete_members( cl_opts ); + case m_diff: + case m_extract: + case m_list: return decode( cl_opts ); + } + } @@ -0,0 +1,609 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <climits> +#include <cstring> +#include <string> +#include <vector> +#include <stdint.h> +#include <sys/types.h> + +#define max_file_size ( LLONG_MAX - header_size ) +enum { header_size = 512 }; +typedef uint8_t Tar_header[header_size]; + +enum Offsets { + name_o = 0, mode_o = 100, uid_o = 108, gid_o = 116, size_o = 124, + mtime_o = 136, chksum_o = 148, typeflag_o = 156, linkname_o = 157, + magic_o = 257, version_o = 263, uname_o = 265, gname_o = 297, + devmajor_o = 329, devminor_o = 337, prefix_o = 345 }; + +enum Lengths { + name_l = 100, mode_l = 8, uid_l = 8, gid_l = 8, size_l = 12, + mtime_l = 12, chksum_l = 8, typeflag_l = 1, linkname_l = 100, + magic_l = 6, version_l = 2, uname_l = 32, gname_l = 32, + devmajor_l = 8, devminor_l = 8, prefix_l = 155 }; + +enum Typeflag { + tf_regular = '0', tf_link = '1', tf_symlink = '2', tf_chardev = '3', + tf_blockdev = '4', tf_directory = '5', tf_fifo = '6', tf_hiperf = '7', + tf_global = 'g', tf_extended = 'x' }; + +const uint8_t ustar_magic[magic_l] = + { 0x75, 0x73, 0x74, 0x61, 0x72, 0 }; // "ustar\0" + +inline bool verify_ustar_magic( const Tar_header header ) + { return std::memcmp( header + magic_o, ustar_magic, magic_l ) == 0; } + +inline void init_tar_header( Tar_header header ) // set magic and version + { + std::memset( header, 0, header_size ); + std::memcpy( header + magic_o, ustar_magic, magic_l - 1 ); + header[version_o] = header[version_o+1] = '0'; + } + +inline void print_octal( uint8_t * const buf, int size, unsigned long long num ) + { while( --size >= 0 ) { buf[size] = num % 8 + '0'; num /= 8; } } + + +// Round "size" to the next multiple of header size (512). +// +inline unsigned long long round_up( const unsigned long long size ) + { + const int rem = size % header_size; + const int padding = rem ? header_size - rem : 0; + return size + padding; + } + + +inline int decimal_digits( unsigned long long value ) + { + int digits = 1; + while( value >= 10 ) { value /= 10; ++digits; } + return digits; + } + + +inline bool dotdot_at_i( const char * const filename, const int i ) + { + return ( filename[i] == '.' && filename[i+1] == '.' && + ( i == 0 || filename[i-1] == '/' ) && + ( filename[i+2] == 0 || filename[i+2] == '/' ) ); + } + + +inline bool contains_dotdot( const char * const filename ) + { + for( int i = 0; filename[i]; ++i ) + if( dotdot_at_i( filename, i ) ) return true; + return false; + } + + +class Resizable_buffer + { + char * p; + unsigned long size_; // size_ < LONG_MAX + +public: + // must be >= 87 for format_member_name + enum { default_initial_size = 2 * header_size }; + + explicit Resizable_buffer( const unsigned long initial_size = + default_initial_size ) + : p( (char *)std::malloc( initial_size ) ), size_( p ? initial_size : 0 ) {} + ~Resizable_buffer() { if( p ) std::free( p ); p = 0; size_ = 0; } + + bool resize( const unsigned long long new_size ) + { + if( new_size >= LONG_MAX ) return false; + if( size_ < new_size ) + { + char * const tmp = (char *)std::realloc( p, new_size ); + if( !tmp ) return false; + p = tmp; size_ = new_size; + } + return true; + } + char * operator()() { return p; } + const char * operator()() const { return p; } + uint8_t * u8() { return (uint8_t *)p; } + const uint8_t * u8() const { return (const uint8_t *)p; } + unsigned long size() const { return size_; } + }; + + +inline bool uid_in_ustar_range( const long long uid ) // also for gid + { return uid >= 0 && uid < 1 << 21; } + +inline bool time_in_ustar_range( const long long seconds ) + { return seconds >= 0 && seconds < 1LL << 33; } + + +/* The sign of the seconds field applies to the whole time value. + A nanoseconds value out of range means an invalid time. */ +class Etime // time since (or before) the epoch + { + long long sec_; + int nsec_; // range [0, 999_999_999] + +public: + Etime() : sec_( 0 ), nsec_( -1 ) {} + void reset() { sec_ = 0; nsec_ = -1; } + void set( const long long s ) { sec_ = s; nsec_ = 0; } + long long sec() const { return sec_; } + int nsec() const { return nsec_; } + bool isvalid() const { return nsec_ >= 0 && nsec_ <= 999999999; } + bool out_of_ustar_range() const + { return isvalid() && !time_in_ustar_range( sec_ ); } + + unsigned decimal_size() const; + unsigned print( char * const buf ) const; + bool parse( const char * const ptr, const char ** const tailp, + const long long size ); + }; + + +class Extended // stores metadata from/for extended records + { + static std::vector< std::string > unknown_keywords; // already diagnosed + std::string linkpath_; // these are the real metadata + std::string path_; + long long file_size_; // >= 0 && <= max_file_size + long long uid_, gid_; // may not fit in unsigned int + Etime atime_, mtime_; + + // cached sizes; if full_size_ < 0 they must be recalculated + mutable long long edsize_; // extended data size + mutable long long padded_edsize_; // edsize rounded up + mutable long long full_size_; // header + padded edsize + mutable long long linkpath_recsize_; + mutable long long path_recsize_; + mutable int file_size_recsize_; + mutable int uid_recsize_; + mutable int gid_recsize_; + mutable int atime_recsize_; + mutable int mtime_recsize_; + + // true if CRC present in parsed or formatted records + mutable bool crc_present_; + + void calculate_sizes() const; + void unknown_keyword( const char * const buf, + const unsigned long long size ) const; + +public: + static const std::string crc_record; + std::string removed_prefix; + + Extended() + : file_size_( 0 ), uid_( -1 ), gid_( -1 ), edsize_( 0 ), + padded_edsize_( 0 ), full_size_( 0 ), linkpath_recsize_( 0 ), + path_recsize_( 0 ), file_size_recsize_( 0 ), uid_recsize_( 0 ), + gid_recsize_( 0 ), atime_recsize_( 0 ), mtime_recsize_( 0 ), + crc_present_( false ) {} + + void reset() + { linkpath_.clear(); path_.clear(); file_size_ = 0; uid_ = -1; gid_ = -1; + atime_.reset(); mtime_.reset(); edsize_ = 0; padded_edsize_ = 0; + full_size_ = 0; linkpath_recsize_ = 0; path_recsize_ = 0; + file_size_recsize_ = 0; uid_recsize_ = 0; gid_recsize_ = 0; + atime_recsize_ = 0; mtime_recsize_ = 0; crc_present_ = false; + removed_prefix.clear(); } + + bool empty() const + { return linkpath_.empty() && path_.empty() && file_size_ == 0 && + uid_ < 0 && gid_ < 0 && + !atime_.out_of_ustar_range() && !mtime_.out_of_ustar_range(); } + + const std::string & linkpath() const { return linkpath_; } + const std::string & path() const { return path_; } + long long file_size() const { return file_size_; } + long long get_file_size_and_reset( const Tar_header header ); + long long get_uid() const { return uid_; } + long long get_gid() const { return gid_; } + const Etime & atime() const { return atime_; } + const Etime & mtime() const { return mtime_; } + + void linkpath( const char * const lp ) { linkpath_ = lp; full_size_ = -1; } + void path( const char * const p ) { path_ = p; full_size_ = -1; } + void file_size( const long long fs ) { full_size_ = -1; + file_size_ = ( fs >= 0 && fs <= max_file_size ) ? fs : 0; } + bool set_uid( const long long id ) + { if( id >= 0 ) { uid_ = id; full_size_ = -1; } return id >= 0; } + bool set_gid( const long long id ) + { if( id >= 0 ) { gid_ = id; full_size_ = -1; } return id >= 0; } + void set_atime( const long long s ) { atime_.set( s ); full_size_ = -1; } + void set_mtime( const long long s ) { mtime_.set( s ); full_size_ = -1; } + + long long full_size() const + { if( full_size_ < 0 ) calculate_sizes(); return full_size_; } + + bool crc_present() const { return crc_present_; } + long long format_block( Resizable_buffer & rbuf ) const; + bool parse( const char * const buf, const unsigned long long edsize, + const bool permissive ); + void fill_from_ustar( const Tar_header header ); + }; + + +class CRC32 + { + uint32_t data[256]; // Table of CRCs of all 8-bit messages. + +public: + CRC32( const bool castagnoli = false ) + { + const unsigned cpol = 0x82F63B78U; // CRC32-C Castagnoli polynomial. + const unsigned ipol = 0xEDB88320U; // IEEE 802.3 Ethernet polynomial. + const unsigned poly = castagnoli ? cpol : ipol; + + for( unsigned n = 0; n < 256; ++n ) + { + unsigned c = n; + for( int k = 0; k < 8; ++k ) + { if( c & 1 ) c = poly ^ ( c >> 1 ); else c >>= 1; } + data[n] = c; + } + } + + void update_byte( uint32_t & crc, const uint8_t byte ) const + { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); } + + // about as fast as it is possible without messing with endianness + void update_buf( uint32_t & crc, const uint8_t * const buffer, + const int size ) const + { + uint32_t c = crc; + for( int i = 0; i < size; ++i ) + c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 ); + crc = c; + } + + uint32_t compute_crc( const uint8_t * const buffer, const int size ) const + { + uint32_t crc = 0xFFFFFFFFU; + for( int i = 0; i < size; ++i ) + crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 ); + return crc ^ 0xFFFFFFFFU; + } + + // Calculates the crc of size bytes except a window of 8 bytes at pos + uint32_t windowed_crc( const uint8_t * const buffer, const int pos, + const int size ) const + { + uint32_t crc = 0xFFFFFFFFU; + update_buf( crc, buffer, pos ); + update_buf( crc, buffer + pos + 8, size - pos - 8 ); + return crc ^ 0xFFFFFFFFU; + } + }; + + +struct Lzma_options + { + int dictionary_size; // 4 KiB .. 512 MiB + int match_len_limit; // 5 .. 273 + }; +const Lzma_options option_mapping[] = + { + { 65535, 16 }, // -0 + { 1 << 20, 5 }, // -1 + { 3 << 19, 6 }, // -2 + { 1 << 21, 8 }, // -3 + { 3 << 20, 12 }, // -4 + { 1 << 22, 20 }, // -5 + { 1 << 23, 36 }, // -6 + { 1 << 24, 68 }, // -7 + { 3 << 23, 132 }, // -8 + { 1 << 25, 273 } }; // -9 + + +enum { + min_dictionary_bits = 12, + min_dictionary_size = 1 << min_dictionary_bits, + max_dictionary_bits = 29, + max_dictionary_size = 1 << max_dictionary_bits, + min_member_size = 36, + min_data_size = 2 * min_dictionary_size, + max_data_size = 2 * max_dictionary_size }; + + +inline bool isvalid_ds( const unsigned dictionary_size ) + { return ( dictionary_size >= min_dictionary_size && + dictionary_size <= max_dictionary_size ); } + + +const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP" + +struct Lzip_header + { + enum { size = 6 }; + uint8_t data[size]; // 0-3 magic bytes + // 4 version + // 5 coded dictionary size + + bool verify_magic() const + { return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); } + + bool verify_prefix( const int sz ) const // detect (truncated) header + { + for( int i = 0; i < sz && i < 4; ++i ) + if( data[i] != lzip_magic[i] ) return false; + return ( sz > 0 ); + } + + bool verify_corrupt() const // detect corrupt header + { + int matches = 0; + for( int i = 0; i < 4; ++i ) + if( data[i] == lzip_magic[i] ) ++matches; + return ( matches > 1 && matches < 4 ); + } + + uint8_t version() const { return data[4]; } + bool verify_version() const { return ( data[4] == 1 ); } + + unsigned dictionary_size() const + { + unsigned sz = ( 1 << ( data[5] & 0x1F ) ); + if( sz > min_dictionary_size ) + sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 ); + return sz; + } + + bool verify() const + { return verify_magic() && verify_version() && + isvalid_ds( dictionary_size() ); } + }; + + +struct Lzip_trailer + { + enum { size = 20 }; + uint8_t data[size]; // 0-3 CRC32 of the uncompressed data + // 4-11 size of the uncompressed data + // 12-19 member size including header and trailer + + unsigned data_crc() const + { + unsigned tmp = 0; + for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + + unsigned long long data_size() const + { + unsigned long long tmp = 0; + for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + + unsigned long long member_size() const + { + unsigned long long tmp = 0; + for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + + bool verify_consistency() const // check internal consistency + { + const unsigned crc = data_crc(); + const unsigned long long dsize = data_size(); + if( ( crc == 0 ) != ( dsize == 0 ) ) return false; + const unsigned long long msize = member_size(); + if( msize < min_member_size ) return false; + const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size; + if( mlimit > dsize && msize > mlimit ) return false; + const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1; + if( dlimit > msize && dsize > dlimit ) return false; + return true; + } + }; + + +enum Program_mode { m_none, m_append, m_compress, m_concatenate, m_create, + m_delete, m_diff, m_extract, m_list }; +enum Solidity { no_solid, bsolid, dsolid, asolid, solid }; +class Arg_parser; + +struct Cl_options // command line options + { + const Arg_parser & parser; + std::string archive_name; + std::string output_filename; + long long mtime; + long long uid; + long long gid; + Program_mode program_mode; + Solidity solidity; + int data_size; + int debug_level; + int level; // compression level, < 0 means uncompressed + int num_files; + int num_workers; // start this many worker threads + int out_slots; + bool dereference; + bool filenames_given; + bool ignore_ids; + bool ignore_overflow; + bool keep_damaged; + bool missing_crc; + bool mtime_set; + bool permissive; + bool preserve_permissions; + bool warn_newer; + + Cl_options( const Arg_parser & ap ) + : parser( ap ), mtime( 0 ), uid( -1 ), gid( -1 ), program_mode( m_none ), + solidity( bsolid ), data_size( 0 ), debug_level( 0 ), level( 6 ), + num_files( 0 ), num_workers( -1 ), out_slots( 64 ), dereference( false ), + filenames_given( false ), ignore_ids( false ), ignore_overflow( false ), + keep_damaged( false ), missing_crc( false ), mtime_set( false ), + permissive( false ), preserve_permissions( false ), warn_newer( false ) {} + + bool to_stdout() const { return output_filename == "-"; } + }; + +inline void set_retval( int & retval, const int new_val ) + { if( retval < new_val ) retval = new_val; } + +const char * const bad_magic_msg = "Bad magic number (file not in lzip format)."; +const char * const bad_dict_msg = "Invalid dictionary size in member header."; +const char * const corrupt_mm_msg = "Corrupt header in multimember file."; +const char * const trailing_msg = "Trailing data not allowed."; +const char * const bad_hdr_msg = "Corrupt or invalid tar header."; +const char * const gblrec_msg = "Error in global extended records."; +const char * const extrec_msg = "Error in extended records."; +const char * const miscrc_msg = "Missing CRC in extended records."; +const char * const misrec_msg = "Missing extended records."; +const char * const longrec_msg = "Extended records are too long."; +const char * const end_msg = "Archive ends unexpectedly."; +const char * const mem_msg = "Not enough memory."; +const char * const mem_msg2 = "Not enough memory. Try a lower compression level."; +const char * const fv_msg1 = "Format violation: extended header followed by EOA blocks."; +const char * const fv_msg2 = "Format violation: extended header followed by global header."; +const char * const fv_msg3 = "Format violation: consecutive extended headers found."; +const char * const posix_msg = "This does not look like a POSIX tar archive."; +const char * const posix_lz_msg = "This does not look like a POSIX tar.lz archive."; +const char * const eclosa_msg = "Error closing archive"; +const char * const eclosf_msg = "Error closing file"; +const char * const nfound_msg = "Not found in archive."; +const char * const seek_msg = "Seek error"; +const char * const werr_msg = "Write error"; +const char * const chdir_msg = "Error changing working directory"; + +// defined in common.cc +void xinit_mutex( pthread_mutex_t * const mutex ); +void xinit_cond( pthread_cond_t * const cond ); +void xdestroy_mutex( pthread_mutex_t * const mutex ); +void xdestroy_cond( pthread_cond_t * const cond ); +void xlock( pthread_mutex_t * const mutex ); +void xunlock( pthread_mutex_t * const mutex ); +void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex ); +void xsignal( pthread_cond_t * const cond ); +void xbroadcast( pthread_cond_t * const cond ); +unsigned long long parse_octal( const uint8_t * const ptr, const int size ); +int readblock( const int fd, uint8_t * const buf, const int size ); +int writeblock( const int fd, const uint8_t * const buf, const int size ); +bool nonempty_arg( const Arg_parser & parser, const int i ); + +// defined in common_decode.cc +bool block_is_zero( const uint8_t * const buf, const int size ); +bool format_member_name( const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const bool long_format ); +bool show_member_name( const Extended & extended, const Tar_header header, + const int vlevel, Resizable_buffer & rbuf ); +bool check_skip_filename( const Cl_options & cl_opts, + std::vector< char > & name_pending, + const char * const filename ); +mode_t get_umask(); +bool make_path( const std::string & name ); + +// defined in compress.cc +int compress( const Cl_options & cl_opts ); + +// defined in create.cc +bool copy_file( const int infd, const int outfd, const long long max_size = -1 ); +bool writeblock_wrapper( const int outfd, const uint8_t * const buffer, + const int size ); +bool write_eoa_records( const int outfd, const bool compressed ); +const char * remove_leading_dotslash( const char * const filename, + std::string * const removed_prefixp, const bool dotdot = false ); +bool print_removed_prefix( const std::string & prefix, + std::string * const msgp = 0 ); +bool fill_headers( const char * const filename, Extended & extended, + Tar_header header, long long & file_size, const int flag ); +bool block_is_full( const long long extended_size, + const unsigned long long file_size, + const unsigned long long target_size, + unsigned long long & partial_data_size ); +void set_error_status( const int retval ); +int final_exit_status( int retval, const bool show_msg = true ); +unsigned ustar_chksum( const Tar_header header ); +bool verify_ustar_chksum( const Tar_header header ); +bool has_lz_ext( const std::string & name ); +int concatenate( const Cl_options & cl_opts ); +int encode( const Cl_options & cl_opts ); + +// defined in create_lz.cc +int encode_lz( const Cl_options & cl_opts, const char * const archive_namep, + const int outfd ); + +// defined in decode.cc +bool compare_file_type( std::string & estr, std::string & ostr, + const Cl_options & cl_opts, + const Extended & extended, const Tar_header header ); +class Archive_reader_base; +bool compare_file_contents( std::string & estr, std::string & ostr, + Archive_reader_base & ar, const long long file_size, + const char * const filename, const int infd2 ); +int decode( const Cl_options & cl_opts ); + +// defined in decode_lz.cc +struct Archive_descriptor; +int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, + std::vector< char > & name_pending ); + +// defined in delete.cc +bool safe_seek( const int fd, const long long pos ); +int tail_copy( const Arg_parser & parser, const Archive_descriptor & ad, + std::vector< char > & name_pending, const long long istream_pos, + const int outfd, int retval ); +int delete_members( const Cl_options & cl_opts ); + +// defined in delete_lz.cc +int delete_members_lz( const Cl_options & cl_opts, + const Archive_descriptor & ad, + std::vector< char > & name_pending, const int outfd ); + +// defined in exclude.cc +namespace Exclude { +void add_pattern( const std::string & arg ); +void clear(); +bool excluded( const char * const filename ); +} // end namespace Exclude + +// defined in extended.cc +extern const CRC32 crc32c; + +// defined in lzip_index.cc +int seek_read( const int fd, uint8_t * const buf, const int size, + const long long pos ); + +// defined in main.cc +extern int verbosity; +extern const char * const program_name; +struct stat; +int hstat( const char * const filename, struct stat * const st, + const bool dereference ); +int open_instream( const std::string & name ); +int open_outstream( const std::string & name, const bool create = true, + Resizable_buffer * const rbufp = 0, const bool force = true ); +void exit_fail_mt( const int retval = 1 ); // terminate the program +void show_error( const char * const msg, const int errcode = 0, + const bool help = false ); +bool format_error( Resizable_buffer & rbuf, const int errcode, + const char * const format, ... ); +void print_error( const int errcode, const char * const format, ... ); +void format_file_error( std::string & estr, const char * const filename, + const char * const msg, const int errcode = 0 ); +bool format_file_error( Resizable_buffer & rbuf, const char * const filename, + const char * const msg, const int errcode = 0 ); +void show_file_error( const char * const filename, const char * const msg, + const int errcode = 0 ); +void internal_error( const char * const msg ); diff --git a/testsuite/check.sh b/testsuite/check.sh new file mode 100755 index 0000000..348e447 --- /dev/null +++ b/testsuite/check.sh @@ -0,0 +1,1417 @@ +#! /bin/sh +# check script for Tarlz - Archiver with multimember lzip compression +# Copyright (C) 2013-2022 Antonio Diaz Diaz. +# +# This script is free software: you have unlimited permission +# to copy, distribute, and modify it. + +LC_ALL=C +export LC_ALL +objdir=`pwd` +testdir=`cd "$1" ; pwd` +TARLZ="${objdir}"/tarlz +framework_failure() { echo "failure in testing framework" ; exit 1 ; } + +if [ ! -f "${TARLZ}" ] || [ ! -x "${TARLZ}" ] ; then + echo "${TARLZ}: cannot execute" + exit 1 +fi + +[ -e "${TARLZ}" ] 2> /dev/null || + { + echo "$0: a POSIX shell is required to run the tests" + echo "Try bash -c \"$0 $1 $2\"" + exit 1 + } + +if [ -d tmp ] ; then rm -rf tmp ; fi +mkdir tmp +cd "${objdir}"/tmp || framework_failure + +in="${testdir}"/test.txt +in_lz="${testdir}"/test.txt.lz +in_tar="${testdir}"/test.txt.tar +in_tar_lz="${testdir}"/test.txt.tar.lz +inbad1="${testdir}"/test_bad1.txt +inbad2="${testdir}"/test_bad2.txt +test3="${testdir}"/test3.tar +test3_lz="${testdir}"/test3.tar.lz +test3dir="${testdir}"/test3_dir.tar +test3dir_lz="${testdir}"/test3_dir.tar.lz +test3dot_lz="${testdir}"/test3_dot.tar.lz +t155="${testdir}"/t155.tar +t155_lz="${testdir}"/t155.tar.lz +tlzit1="${testdir}"/tlz_in_tar1.tar +tlzit2="${testdir}"/tlz_in_tar2.tar +bad1="${testdir}"/test3_bad1.tar +bad2="${testdir}"/test3_bad2.tar +bad3="${testdir}"/test3_bad3.tar +bad4="${testdir}"/test3_bad4.tar +bad5="${testdir}"/test3_bad5.tar +bad1_lz="${testdir}"/test3_bad1.tar.lz +bad2_lz="${testdir}"/test3_bad2.tar.lz +bad3_lz="${testdir}"/test3_bad3.tar.lz +bad4_lz="${testdir}"/test3_bad4.tar.lz +bad5_lz="${testdir}"/test3_bad5.tar.lz +bad6_lz="${testdir}"/test3_bad6.tar.lz +eoa="${testdir}"/eoa_blocks.tar +eoa_lz="${testdir}"/eoa_blocks.tar.lz +fail=0 +lwarnc=0 +test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } +cyg_symlink() { [ ${lwarnc} = 0 ] && + printf "\nwarning: your OS follows symbolic links to directories even when tarlz asks it not to\n$1" + lwarnc=1 ; } + +# Description of test files for tarlz: +# test.txt.tar.lz: 1 member (test.txt). +# t155.tar[.lz]: directory + 3 links + file + EOA, all with 155 char names +# t155_fv?.tar[.lz]: like t155.tar but with 3 kinds of format violations +# t155_fv1.tar[.lz]: extra extended header before EOA blocks +# t155_fv2.tar[.lz]: first extended header followed by global header +# t155_fv3.tar[.lz]: consecutive extended headers in last member +# t155_fv[456].tar.lz: like t155_fv[123].tar.lz but violation alone in member +# tar_in_tlz1.tar.lz: 2 members (test.txt.tar test3.tar) 3 lzip members +# tar_in_tlz2.tar.lz: 2 members (test.txt.tar test3.tar) 5 lzip members +# ts_in_link.tar.lz: 4 symbolic links (link[1-4]) to / /dir/ dir/ dir(107/) +# test_bad1.txt.tar.lz: truncated at offset 6000 (of 7495) +# test_bad2.txt.tar.lz: byte at offset 6000 changed from 0x56 to 0x46 +# test3.tar[.lz]: 3 members (foo bar baz) + 2 zeroed 512-byte blocks +# test3_dir.tar[.lz] like test3.tar but members /dir/foo /dir/bar /dir/baz +# test3_dot.tar.lz: 3 times 3 members ./foo ././bar ./././baz +# the 3 central members with filename in extended header +# test3_bad1.tar: byte at offset 259 changed from 't' to '0' (magic) +# test3_bad2.tar: byte at offset 1283 changed from 't' to '0' (magic) +# test3_bad3.tar: byte at offset 2559 changed from 0x00 to 0x20 (padding) +# test3_bad4.tar: byte at offset 1283 changed from 't' to '0' (magic) +# byte at offset 2307 changed from 't' to '0' (magic) +# test3_bad5.tar: 510 zeros + "LZ" prepended to test3.tar (bogus lz header) +# test3_bad1.tar.lz: byte at offset 2 changed from 'I' to 'i' (magic) +# test3_bad2.tar.lz: byte at offset 49 changed from 0x49 to 0x69 (mid stream) +# test3_bad3.tar.lz: byte at offset 176 changed from 0x7D to 0x6D (mid stream) +# test3_bad4.tar.lz: combined damage of test3_bad2.tar.lz and test3_bad3.tar.lz +# test3_bad5.tar.lz: [71-134] --> zeroed (first trailer + second header) +# test3_bad6.tar.lz: 510 zeros prepended to test3.tar.lz (header in two blocks) +# test3_eoa?.tar: like test3_eoa?.tar.lz but uncompressed +# test3_eoa1.tar.lz: test3.tar.lz without EOA blocks +# test3_eoa2.tar.lz: test3.tar.lz with only one EOA block +# test3_eoa3.tar.lz: test3.tar.lz with one zeroed block between foo and bar +# test3_eoa4.tar.lz: test3.tar.lz ended by extended header without EOA blocks +# test3_eoa5.tar.lz: test3.tar.lz split extended bar member, without EOA blocks +# test3_em?.tar.lz: test3.tar.lz with one empty lzip member at each position +# test3_em6.tar.lz: test3.tar.lz preceded by four empty lzip members +# test3_gh?.tar: test3.tar with global header at each position +# test3_gh?.tar.lz: test3.tar.lz with global before bar split in 4 ways +# test3_gh5.tar.lz: test3.tar.lz with global in lzip member before foo +# test3_gh6.tar.lz: test3.tar.lz with global before foo in same member +# test3_nn.tar[.lz]: test3.tar[.lz] with zeroed name (no name) in bar member +# test3_sm?.tar.lz: test3.tar.lz with extended bar member split in 4 ways +# tlz_in_tar1.tar: 1 member (test3.tar.lz) first magic damaged +# tlz_in_tar2.tar: 2 members (foo test3.tar.lz) first magic damaged +# ug32chars.tar.lz: 1 member (foo) with 32-character owner and group names +# ug32767.tar.lz: 1 member (foo) with numerical-only owner and group + +# Note that multi-threaded --list succeeds with test_bad2.txt.tar.lz and +# test3_bad3.tar.lz because their headers are intact. + +"${TARLZ}" --check-lib # just print warning +[ $? != 2 ] || test_failed $LINENO # unless bad lzlib.h +printf "testing tarlz-%s..." "$2" + +"${TARLZ}" -q -tf "${in}" +[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -q -tf "${in_lz}" +[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -q -tf "${in_tar_lz}" -f "${in_tar_lz}" +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -tf nx_file +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -tf 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -cf out.tar.lz +[ $? = 1 ] || test_failed $LINENO +[ ! -e out.tar.lz ] || test_failed $LINENO +"${TARLZ}" -rf out.tar.lz || test_failed $LINENO +[ ! -e out.tar.lz ] || test_failed $LINENO +"${TARLZ}" -r || test_failed $LINENO +"${TARLZ}" --uncompressed -q -rf out.tar "${in}" +[ $? = 1 ] || test_failed $LINENO +[ ! -e out.tar ] || test_failed $LINENO +cat "${test3_lz}" > test.tar.lz || framework_failure +"${TARLZ}" --uncompressed -q -rf test.tar.lz "${in}" +[ $? = 2 ] || test_failed $LINENO +cmp "${test3_lz}" test.tar.lz || test_failed $LINENO +rm -f test.tar.lz || framework_failure +cat "${test3}" > test.tar || framework_failure +"${TARLZ}" -q -rf test.tar "${in}" +[ $? = 2 ] || test_failed $LINENO +cmp "${test3}" test.tar || test_failed $LINENO +rm -f test.tar || framework_failure +"${TARLZ}" -q -c "${in}" nx_file > /dev/null +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -c -C nx_dir "${in}" +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -x -C nx_dir "${test3_lz}" +[ $? = 1 ] || test_failed $LINENO +touch empty.tar.lz empty.tlz # list an empty lz file +"${TARLZ}" -q -tf empty.tar.lz +[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -q -tf empty.tlz +[ $? = 2 ] || test_failed $LINENO +rm -f empty.tar.lz empty.tlz || framework_failure +"${TARLZ}" -q -cd # test mixed operations +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -cr +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -ct +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -cx +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -tx +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -ctx +[ $? = 1 ] || test_failed $LINENO +for i in A c d r t x -delete ; do # test -o with operations other than -z + "${TARLZ}" -q -$i -o - + [ $? = 1 ] || test_failed $LINENO $i +done +"${TARLZ}" -q -z -f - +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -z . +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -z -o - --uncompressed "${test3}" > /dev/null 2>&1 +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -tf "${in_tar_lz}" "" # empty non-option argument +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" --help > /dev/null || test_failed $LINENO +"${TARLZ}" -V > /dev/null || test_failed $LINENO +"${TARLZ}" --bad_option -tf "${test3_lz}" 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -tf 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +bad_dates='@-9223372036854775809 @9223372036854775808 + -2147481749-01-01T00:00:00 2147483648-01-01T00:00:00 + 2017-10-01T 2017-10 ./nx_file' +for i in ${bad_dates} ; do + "${TARLZ}" -c --mtime="$i" "${in}" > /dev/null 2>&1 + [ $? = 1 ] || test_failed $LINENO "$i" +done +"${TARLZ}" --owner=invalid_owner_name -tf "${test3_lz}" 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" --group=invalid_group_name -tf "${test3_lz}" 2> /dev/null +[ $? = 1 ] || test_failed $LINENO + +printf "\ntesting --list and --extract..." + +# test --list and --extract +"${TARLZ}" -tf "${eoa_lz}" --missing-crc || test_failed $LINENO +"${TARLZ}" -xf "${eoa_lz}" --missing-crc || test_failed $LINENO +"${TARLZ}" -C nx_dir -tf "${in_tar}" > /dev/null || test_failed $LINENO +"${TARLZ}" -xf "${in_tar}" --missing-crc || test_failed $LINENO +cmp "${in}" test.txt || test_failed $LINENO +rm -f test.txt || framework_failure +"${TARLZ}" -tf "${in_tar_lz}" --missing-crc > /dev/null || test_failed $LINENO +for i in 0 2 6 ; do + "${TARLZ}" -n$i -xf "${in_tar_lz}" --missing-crc || test_failed $LINENO $i + cmp "${in}" test.txt || test_failed $LINENO $i + rm -f test.txt || framework_failure +done + +# test3 reference files for -t and -tv (list3, vlist3) +"${TARLZ}" -tf "${test3}" > list3 || test_failed $LINENO +"${TARLZ}" -tvf "${test3}" > vlist3 || test_failed $LINENO +for i in 0 2 6 ; do + "${TARLZ}" -n$i -tf "${test3_lz}" > out || test_failed $LINENO $i + diff -u list3 out || test_failed $LINENO $i + "${TARLZ}" -n$i -tvf "${test3_lz}" > out || test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i +done +rm -f out || framework_failure + +# test3 reference files for cmp +cat "${testdir}"/rfoo > cfoo || framework_failure +cat "${testdir}"/rbar > cbar || framework_failure +cat "${testdir}"/rbaz > cbaz || framework_failure + +# test --list and --extract test3 +rm -f foo bar baz || framework_failure +"${TARLZ}" -xf "${test3}" --missing-crc || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +for i in 0 2 6 ; do + "${TARLZ}" -n$i -xf "${test3_lz}" --missing-crc || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -n$i -tvf "${test3_lz}" ./foo ./bar ./baz > out 2> /dev/null || + test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i + rm -f out || framework_failure + "${TARLZ}" -q -n$i -xf "${test3_lz}" ./foo ./bar ./baz || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -n$i -xf "${test3_lz}" foo/ bar// baz/// || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dot_lz}" --missing-crc || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -tf "${test3dot_lz}" foo bar baz || test_failed $LINENO $i + "${TARLZ}" -q -n$i -xf "${test3dot_lz}" foo bar baz || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure +done + +for i in "${test3dir}" "${test3dir_lz}" ; do + "${TARLZ}" -q -tf "$i" --missing-crc || test_failed $LINENO "$i" + "${TARLZ}" -q -xf "$i" --missing-crc || test_failed $LINENO "$i" + cmp cfoo dir/foo || test_failed $LINENO "$i" + cmp cbar dir/bar || test_failed $LINENO "$i" + cmp cbaz dir/baz || test_failed $LINENO "$i" + rm -rf dir || framework_failure + "${TARLZ}" -q -tf "$i" dir || test_failed $LINENO "$i" + "${TARLZ}" -q -xf "$i" dir || test_failed $LINENO "$i" + cmp cfoo dir/foo || test_failed $LINENO "$i" + cmp cbar dir/bar || test_failed $LINENO "$i" + cmp cbaz dir/baz || test_failed $LINENO "$i" + rm -rf dir || framework_failure + "${TARLZ}" -q -tf "$i" dir/foo dir/baz || test_failed $LINENO "$i" + "${TARLZ}" -q -xf "$i" dir/foo dir/baz || test_failed $LINENO "$i" + cmp cfoo dir/foo || test_failed $LINENO "$i" + [ ! -e dir/bar ] || test_failed $LINENO "$i" + cmp cbaz dir/baz || test_failed $LINENO "$i" + rm -rf dir || framework_failure +done + +# test --extract --exclude +"${TARLZ}" -xf "${test3}" --exclude='f*o' --exclude=baz || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +rm -f foo bar baz || framework_failure +for i in 0 2 6 ; do + "${TARLZ}" -n$i -xf "${test3_lz}" --exclude=bar || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + [ ! -e bar ] || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude='?ar' || test_failed $LINENO $i + cmp cfoo dir/foo || test_failed $LINENO $i + [ ! -e dir/bar ] || test_failed $LINENO $i + cmp cbaz dir/baz || test_failed $LINENO $i + rm -rf dir || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude=dir/bar || test_failed $LINENO $i + cmp cfoo dir/foo || test_failed $LINENO $i + [ ! -e dir/bar ] || test_failed $LINENO $i + cmp cbaz dir/baz || test_failed $LINENO $i + rm -rf dir || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude=dir || test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + rm -rf dir || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude='dir/*' || test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + rm -rf dir || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude='[bf][ao][orz]' || + test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + rm -rf dir || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude='*o' dir/foo || + test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + rm -rf dir || framework_failure +done + +# test --list and --extract EOA +"${TARLZ}" -tvf "${testdir}"/test3_eoa1.tar > out 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +diff -u vlist3 out || test_failed $LINENO +"${TARLZ}" -tvf "${testdir}"/test3_eoa2.tar > out || test_failed $LINENO +diff -u vlist3 out || test_failed $LINENO +"${TARLZ}" -q -tf "${testdir}"/test3_eoa3.tar || test_failed $LINENO +"${TARLZ}" -tvf "${testdir}"/test3_eoa4.tar > out 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +diff -u vlist3 out || test_failed $LINENO +for i in 0 2 6 ; do + "${TARLZ}" -n$i -tvf "${testdir}"/test3_eoa1.tar.lz > out 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i + "${TARLZ}" -n$i -tvf "${testdir}"/test3_eoa2.tar.lz > out || + test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i + "${TARLZ}" -q -n$i -tf "${testdir}"/test3_eoa3.tar.lz || + test_failed $LINENO $i + "${TARLZ}" -n$i -tvf "${testdir}"/test3_eoa4.tar.lz > out 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i + "${TARLZ}" -n$i -tvf "${testdir}"/test3_eoa5.tar.lz > out 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i +done +rm -f out || framework_failure +# +"${TARLZ}" -q -xf "${testdir}"/test3_eoa1.tar +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -xf "${testdir}"/test3_eoa2.tar || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -xf "${testdir}"/test3_eoa3.tar || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -xf "${testdir}"/test3_eoa4.tar +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +# +for i in 0 2 6 ; do + "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eoa1.tar.lz + [ $? = 2 ] || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -n$i -xf "${testdir}"/test3_eoa2.tar.lz || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eoa4.tar.lz + [ $? = 2 ] || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eoa5.tar.lz + [ $? = 2 ] || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure +done +"${TARLZ}" -n0 -xf "${testdir}"/test3_eoa3.tar.lz || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO $i +[ ! -e bar ] || test_failed $LINENO $i +[ ! -e baz ] || test_failed $LINENO $i +rm -f foo bar baz || framework_failure + +# test --list and --extract tar in tar.lz +for i in "${testdir}"/tar_in_tlz1.tar.lz "${testdir}"/tar_in_tlz2.tar.lz ; do + for j in 0 2 6 ; do + "${TARLZ}" -tf "$i" -n$j > out$j || + test_failed $LINENO "$i $j" + "${TARLZ}" -tvf "$i" -n$j > outv$j || + test_failed $LINENO "$i $j" + done + diff -u out0 out2 || test_failed $LINENO "$i" + diff -u out0 out6 || test_failed $LINENO "$i" + diff -u out2 out6 || test_failed $LINENO "$i" + diff -u outv0 outv2 || test_failed $LINENO "$i" + diff -u outv0 outv6 || test_failed $LINENO "$i" + diff -u outv2 outv6 || test_failed $LINENO "$i" + rm -f out0 out2 out6 outv0 outv2 outv6 || framework_failure + for j in 0 2 6 ; do + "${TARLZ}" -xf "$i" -n$j || test_failed $LINENO "$i $j" + cmp "${in_tar}" test.txt.tar || test_failed $LINENO "$i $j" + cmp "${test3}" test3.tar || test_failed $LINENO "$i $j" + rm -f test.txt.tar test3.tar || framework_failure + done +done + +# test --list and --extract with global headers uncompressed +for i in gh1 gh2 gh3 gh4 ; do + "${TARLZ}" -tf "${testdir}"/test3_${i}.tar > out || test_failed $LINENO $i + diff -u list3 out || test_failed $LINENO $i + "${TARLZ}" -tvf "${testdir}"/test3_${i}.tar > out || test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i + "${TARLZ}" -xf "${testdir}"/test3_${i}.tar || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz out || framework_failure +done + +# test --list and --extract with empty lzip members, global headers and +# extended tar members split among lzip members +for i in em1 em2 em3 em4 em5 em6 gh1 gh2 gh3 gh4 gh5 gh6 sm1 sm2 sm3 sm4 ; do + for j in 0 2 6 ; do + "${TARLZ}" -n$j -tf "${testdir}"/test3_${i}.tar.lz > out || + test_failed $LINENO "$i $j" + diff -u list3 out || test_failed $LINENO "$i $j" + "${TARLZ}" -n$j -tvf "${testdir}"/test3_${i}.tar.lz > out || + test_failed $LINENO "$i $j" + diff -u vlist3 out || test_failed $LINENO "$i $j" + done + rm -f out || framework_failure + for j in 0 2 6 ; do + "${TARLZ}" -n$j -xf "${testdir}"/test3_${i}.tar.lz || + test_failed $LINENO "$i $j" + cmp cfoo foo || test_failed $LINENO "$i $j" + cmp cbar bar || test_failed $LINENO "$i $j" + cmp cbaz baz || test_failed $LINENO "$i $j" + rm -f foo bar baz || framework_failure + done +done +rm -f list3 vlist3 || framework_failure + +printf "\ntesting --concatenate..." + +# test --concatenate compressed +cat "${in}" > out.tar.lz || framework_failure # invalid tar.lz +"${TARLZ}" -Aqf out.tar.lz "${test3_lz}" +[ $? = 2 ] || test_failed $LINENO +cat "${in_tar_lz}" > out.tar.lz || framework_failure +"${TARLZ}" -Af out.tar.lz "${test3_lz}" || test_failed $LINENO +"${TARLZ}" -xf out.tar.lz || test_failed $LINENO +cmp "${in}" test.txt || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f test.txt foo bar baz || framework_failure +touch aout.tar.lz || framework_failure # concatenate to empty file +"${TARLZ}" -Aqf aout.tar.lz "${in_tar}" +[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -Af aout.tar.lz "${in_tar_lz}" "${test3_lz}" || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -Af aout.tar.lz || test_failed $LINENO # concatenate nothing +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -Aqf aout.tar.lz aout.tar.lz || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -Aq "${in_tar_lz}" "${test3}" > aout.tar.lz # to stdout +[ $? = 2 ] || test_failed $LINENO +cmp "${in_tar_lz}" aout.tar.lz || test_failed $LINENO +"${TARLZ}" -A "${in_tar_lz}" "${test3_lz}" > aout.tar.lz || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +cat "${eoa_lz}" > aout.tar.lz || framework_failure +"${TARLZ}" -Aqf aout.tar.lz "${in_tar}" # concatenate to empty archive +[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -Af aout.tar.lz "${in_tar_lz}" "${test3_lz}" || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +cat "${in_tar_lz}" > aout.tar.lz || framework_failure +"${TARLZ}" -Aqf aout.tar.lz "${test3_lz}" "${test3}" +[ $? = 2 ] || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +rm -f aout.tar.lz || framework_failure +touch aout.tar.lz || framework_failure # --exclude +"${TARLZ}" -Af aout.tar.lz "${in_tar_lz}" "${test3_lz}" --exclude 'test3*' || + test_failed $LINENO +"${TARLZ}" -Af aout.tar.lz "${in_tar_lz}" "${test3_lz}" --exclude '*txt*' || + test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +rm -f out.tar.lz aout.tar.lz || framework_failure + +# test --concatenate uncompressed +cat "${in}" > out.tar || framework_failure # invalid tar +"${TARLZ}" -Aqf out.tar "${test3}" +[ $? = 2 ] || test_failed $LINENO +cat "${in_tar}" > out.tar || framework_failure +"${TARLZ}" -Af out.tar "${test3}" || test_failed $LINENO +"${TARLZ}" -xf out.tar || test_failed $LINENO +cmp "${in}" test.txt || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f test.txt foo bar baz || framework_failure +touch aout.tar || framework_failure # concatenate to empty file +"${TARLZ}" -Aqf aout.tar "${in_tar_lz}" +[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -Af aout.tar "${in_tar}" "${test3}" || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" -Af aout.tar || test_failed $LINENO # concatenate nothing +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" -Aqf aout.tar aout.tar || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" -Aq "${in_tar}" "${test3_lz}" > aout.tar # to stdout +[ $? = 2 ] || test_failed $LINENO +cmp "${in_tar}" aout.tar || test_failed $LINENO +"${TARLZ}" -A "${in_tar}" "${test3}" > aout.tar || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +cat "${eoa}" > aout.tar || framework_failure # concatenate to empty archive +"${TARLZ}" -Aqf aout.tar "${in_tar_lz}" +[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -Af aout.tar "${in_tar}" "${test3}" || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +cat "${in_tar}" > aout.tar || framework_failure +"${TARLZ}" -Aqf aout.tar "${test3}" "${test3_lz}" +[ $? = 2 ] || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +rm -f aout.tar || framework_failure +touch aout.tar || framework_failure # --exclude +"${TARLZ}" -Af aout.tar "${test3}" "${in_tar}" --exclude 'test3*' || + test_failed $LINENO +"${TARLZ}" -Af aout.tar "${test3}" "${in_tar}" --exclude '*txt*' || + test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +rm -f out.tar aout.tar || framework_failure + +printf "\ntesting --create..." + +# test --create +cat "${in}" > test.txt || framework_failure +"${TARLZ}" --warn-newer -0 -cf out.tar.lz test.txt || test_failed $LINENO +rm -f test.txt || framework_failure +"${TARLZ}" -xf out.tar.lz --missing-crc || test_failed $LINENO +cmp "${in}" test.txt || test_failed $LINENO +cat "${in}" > test.txt || framework_failure +"${TARLZ}" --warn-newer --uncompressed -cf out.tar test.txt || test_failed $LINENO +rm -f test.txt || framework_failure +"${TARLZ}" -xf out.tar --missing-crc || test_failed $LINENO +cmp "${in}" test.txt || test_failed $LINENO +rm -f test.txt out.tar out.tar.lz || framework_failure + +cat cfoo > foo || framework_failure +rm -f bar || framework_failure +cat cbaz > baz || framework_failure +"${TARLZ}" -0 -q -cf out.tar.lz foo bar baz +[ $? = 1 ] || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -xf out.tar.lz --missing-crc || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -xf out.tar.lz bar +[ $? = 1 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +rm -f out.tar.lz || framework_failure + +cat cfoo > foo || framework_failure +cat cbar > bar || framework_failure +cat cbaz > baz || framework_failure +"${TARLZ}" -0 -cf out.tar.lz foo bar baz --out-slots=1 || test_failed $LINENO +"${TARLZ}" -0 -q -cf aout.tar.lz foo bar aout.tar.lz baz || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO # test reproducible +rm -f aout.tar.lz || framework_failure +# +"${TARLZ}" -0 -cf aout.tar.lz foo bar baz -C / || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +rm -f aout.tar.lz || framework_failure +"${TARLZ}" -0 -C / -cf aout.tar.lz -C "${objdir}"/tmp foo bar baz || + test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +rm -f aout.tar.lz || framework_failure +"${TARLZ}" --asolid -0 -cf aout.tar.lz foo bar baz || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +rm -f aout.tar.lz || framework_failure +"${TARLZ}" -0 -q -cf aout.tar.lz foo/ ./bar ./baz/ || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +rm -f aout.tar.lz || framework_failure +mkdir dir1 || framework_failure +"${TARLZ}" -C dir1 -xf out.tar.lz || test_failed $LINENO +cmp cfoo dir1/foo || test_failed $LINENO +cmp cbar dir1/bar || test_failed $LINENO +cmp cbaz dir1/baz || test_failed $LINENO +rm -f aout.tar.lz foo bar baz || framework_failure +"${TARLZ}" -C dir1 -0 -cf aout.tar.lz foo bar baz || test_failed $LINENO +"${TARLZ}" -xf aout.tar.lz || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f aout.tar.lz foo bar baz || framework_failure +"${TARLZ}" -C dir1 -0 -c foo bar baz | "${TARLZ}" -x || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f dir1/foo dir1/bar dir1/baz || framework_failure +"${TARLZ}" -0 -c foo bar baz | "${TARLZ}" -C dir1 -x || test_failed $LINENO +cmp cfoo dir1/foo || test_failed $LINENO +cmp cbar dir1/bar || test_failed $LINENO +cmp cbaz dir1/baz || test_failed $LINENO +rm -f dir1/foo dir1/bar dir1/baz || framework_failure +"${TARLZ}" -0 -c foo bar baz | "${TARLZ}" -x foo bar baz -C dir1 || + test_failed $LINENO +cmp cfoo dir1/foo || test_failed $LINENO +cmp cbar dir1/bar || test_failed $LINENO +cmp cbaz dir1/baz || test_failed $LINENO +rm -f foo dir1/bar baz || framework_failure +"${TARLZ}" -0 -cf aout.tar.lz -C dir1 foo -C .. bar -C dir1 baz || + test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -0 -cf aout.tar.lz dir1/foo dir1/baz || test_failed $LINENO +rm -rf dir1 bar || framework_failure +"${TARLZ}" -xf aout.tar.lz dir1 || test_failed $LINENO +cmp cfoo dir1/foo || test_failed $LINENO +cmp cbaz dir1/baz || test_failed $LINENO +rm -rf dir1 || framework_failure +rm -f out.tar.lz aout.tar.lz || framework_failure + +# test --create --exclude +cat cfoo > foo || framework_failure +cat cbar > bar || framework_failure +cat cbaz > baz || framework_failure +"${TARLZ}" -0 -cf out.tar.lz foo bar baz --exclude 'ba?' || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -xf out.tar.lz || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +rm -f out.tar.lz foo bar baz || framework_failure +cat cfoo > foo || framework_failure +cat cbar > bar || framework_failure +cat cbaz > baz || framework_failure +"${TARLZ}" --un -cf out.tar foo bar baz --exclude 'ba*' || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -xf out.tar || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +rm -f out.tar foo bar baz || framework_failure + +# test --create --mtime +dates='@-9223372036854775808 @-9223372036854775807 + -2147481748-12-31T23:59:59 -1970-01-01T00:00:00 + 0000-01-01T00:00:00 0000-01-01T00:00:01 0000-01-02T00:00:00 + 1697-10-17T11:03:27 1697-10-17T11:03:28 1697-10-17T11:03:29 + 1833-11-24T17:31:43 1833-11-24T17:31:44 1833-11-24T17:31:45 + 1901-12-13T20:45:51 1901-12-13T20:45:52 1901-12-13T20:45:53 + 1901-12-14T20:45:51 + 1969-12-31T23:59:58 1969-12-31T23:59:59 + 1970-01-01T00:00:00 1970-01-01T00:00:01 @0 + 2038-01-18T03:14:07 2038-01-19T03:14:07 2038-01-19T03:14:08 + 2106-02-07T06:28:15 2106-02-07T06:28:16 + 2242-03-16T12:56:31 2242-03-16T12:56:32 @8589934591 @8589934592 + 9999-12-31T23:59:58 9999-12-31T23:59:59 + 2147483647-12-31T23:59:59 @9223372036854775807' +touch -d 2022-01-05T12:22:13 bar || framework_failure +for i in ${dates} @-8Ei '2017-10-01 09:00:00' '2017-10-1 9:0:0' \ + '2017-10-01 09:00' '2017-10-01 09' 2017-10-01 ./bar ; do + touch foo || framework_failure + "${TARLZ}" --un -cf out.tar --mtime="$i" foo || test_failed $LINENO "$i" + "${TARLZ}" -q -df out.tar && test_failed $LINENO "$i" + "${TARLZ}" -xf out.tar || test_failed $LINENO "$i" + "${TARLZ}" -df out.tar --ignore-overflow || test_failed $LINENO "$i" +done +rm -f out.tar foo bar || framework_failure + +mkdir dir || framework_failure +for i in ${dates} ; do + # Skip a time stamp $i if it's out of range for this platform, + # of if it uses a notation that this platform does not recognize. + touch -d $i dir/f$i >/dev/null 2>&1 || continue +done +"${TARLZ}" --uncompressed -cf out.tar dir || test_failed $LINENO +"${TARLZ}" -df out.tar || test_failed $LINENO +rm -rf out.tar dir || framework_failure + +printf "\ntesting --diff..." + +# test --diff +"${TARLZ}" -xf "${test3_lz}" || test_failed $LINENO +"${TARLZ}" --uncompressed -cf out.tar foo || test_failed $LINENO +"${TARLZ}" --uncompressed -cf aout.tar foo --anonymous || test_failed $LINENO +if cmp out.tar aout.tar > /dev/null ; then + printf "\nwarning: '--diff' test can't be run as root.\n" +else + for i in 0 2 6 ; do + "${TARLZ}" -n$i -xf "${test3_lz}" || test_failed $LINENO $i + "${TARLZ}" -n$i -df "${test3_lz}" > out$i + [ $? = 1 ] || test_failed $LINENO $i + "${TARLZ}" -n$i -df "${test3_lz}" --ignore-ids || test_failed $LINENO $i + "${TARLZ}" -n$i -df "${test3_lz}" --exclude '*' || test_failed $LINENO $i + "${TARLZ}" -n$i -df "${in_tar_lz}" --exclude '*' || test_failed $LINENO $i + rm -f bar || framework_failure + "${TARLZ}" -n$i -df "${test3_lz}" foo baz --ignore-ids || + test_failed $LINENO $i + "${TARLZ}" -n$i -df "${test3_lz}" --exclude bar --ignore-ids || + test_failed $LINENO $i + rm -f foo baz || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" || test_failed $LINENO $i + "${TARLZ}" -q -n$i -df "${test3dir_lz}" --ignore-ids || + test_failed $LINENO $i + "${TARLZ}" -q -n$i -df "${test3dir_lz}" dir --ignore-ids || + test_failed $LINENO $i + "${TARLZ}" -n$i -df "${test3_lz}" --ignore-ids -C dir || + test_failed $LINENO $i + rm -rf dir || framework_failure + done + cmp out0 out2 || test_failed $LINENO + cmp out0 out6 || test_failed $LINENO + rm -f out0 out2 out6 || framework_failure +fi +rm -f out.tar aout.tar foo bar baz || framework_failure + +printf "\ntesting --delete..." + +# test --delete +cat "${in}" > out.tar || framework_failure # invalid tar +"${TARLZ}" -q -f out.tar --delete foo +[ $? = 2 ] || test_failed $LINENO +rm -f out.tar || framework_failure +cat "${in}" > out.tar.lz || framework_failure # invalid tar.lz +"${TARLZ}" -q -f out.tar.lz --delete foo +[ $? = 2 ] || test_failed $LINENO +cat "${in_lz}" > out.tar.lz || framework_failure # invalid tar.lz +"${TARLZ}" -q -f out.tar.lz --delete foo +[ $? = 2 ] || test_failed $LINENO +rm -f out.tar.lz || framework_failure + +for e in "" .lz ; do + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --delete test.txt || test_failed $LINENO $e + cmp "${test3}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --delete || test_failed $LINENO $e # delete nothing + cmp "${test3}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --delete nx_file + [ $? = 1 ] || test_failed $LINENO $e + cmp "${test3}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --delete test.txt || test_failed $LINENO $e + cmp "${test3dir}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --delete dir || test_failed $LINENO $e + cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --del dir/foo dir/bar dir/baz || test_failed $LINENO $e + cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --del dir/foo dir/baz || test_failed $LINENO $e + cmp "${in_tar}"$e out.tar$e > /dev/null && test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --del dir/bar || test_failed $LINENO $e + cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --delete foo bar baz || test_failed $LINENO $e + cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --del test.txt foo bar baz || test_failed $LINENO $e + cmp "${eoa}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + for i in test.txt foo bar baz ; do + "${TARLZ}" -f out.tar$e --delete $i || test_failed $LINENO "$e $i" + done + cmp "${eoa}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + for i in baz bar foo test.txt ; do + "${TARLZ}" -f out.tar$e --delete $i || test_failed $LINENO "$e $i" + done + cmp "${eoa}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + for i in foo bar test.txt baz ; do + "${TARLZ}" -f out.tar$e --delete $i || test_failed $LINENO "$e $i" + done + cmp "${eoa}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${t155}"$e "${test3}"$e > out.tar$e || + test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --del baz foo test.txt bar || test_failed $LINENO $e + cmp "${t155}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --delete link || test_failed $LINENO $e + "${TARLZ}" -q -tf out.tar$e || test_failed $LINENO $e + cmp "${t155}"$e out.tar$e > /dev/null && test_failed $LINENO $e + rm -f out.tar$e || framework_failure +done + +# test --delete individual member after collective member +cat cfoo > foo || framework_failure +cat cbar > bar || framework_failure +cat cbaz > baz || framework_failure +cat "${in}" > test.txt || framework_failure +"${TARLZ}" -0 -cf out.tar.lz foo bar baz --asolid || test_failed $LINENO +"${TARLZ}" -0 -rf out.tar.lz test.txt || test_failed $LINENO +rm -f foo bar baz test.txt || framework_failure +for i in foo bar baz ; do + "${TARLZ}" -q -f out.tar.lz --delete $i + [ $? = 2 ] || test_failed $LINENO $i +done +"${TARLZ}" -f out.tar.lz --delete test.txt || test_failed $LINENO +"${TARLZ}" -xf out.tar.lz || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +[ ! -e test.txt ] || test_failed $LINENO +rm -f out.tar.lz foo bar baz test.txt || framework_failure + +# test --delete with empty lzip member, global header +for i in 1 2 3 4 5 6 ; do + cat "${testdir}"/test3_em${i}.tar.lz > out.tar.lz || framework_failure + for j in foo bar baz ; do + "${TARLZ}" -f out.tar.lz --delete $j || test_failed $LINENO "$i $j" + done + rm -f out.tar.lz || framework_failure +done +cat "${testdir}"/test3_gh5.tar.lz > out.tar.lz || framework_failure +for i in foo bar baz ; do + "${TARLZ}" -f out.tar.lz --delete $i || test_failed $LINENO $i +done +rm -f out.tar.lz || framework_failure +for i in 1 2 3 4 ; do + cat "${testdir}"/test3_gh${i}.tar > out.tar || framework_failure + for j in foo bar baz ; do + "${TARLZ}" -f out.tar --delete $j || test_failed $LINENO "$i $j" + done + rm -f out.tar || framework_failure +done + +printf "\ntesting --dereference..." + +# test --dereference +touch dummy_file || framework_failure +if ln dummy_file dummy_link 2> /dev/null && + ln -s dummy_file dummy_slink 2> /dev/null ; then + ln_works=yes +else + printf "\nwarning: skipping link test: 'ln' does not work on your system.\n" +fi +rm -f dummy_slink dummy_link dummy_file || framework_failure +# +if [ "${ln_works}" = yes ] ; then + mkdir dir || framework_failure + cat cfoo > dir/foo || framework_failure + cat cbar > dir/bar || framework_failure + cat cbaz > dir/baz || framework_failure + ln -s dir dir_link || framework_failure + "${TARLZ}" -0 -cf out1 dir_link || test_failed $LINENO + "${TARLZ}" --un -cf out2 dir_link || test_failed $LINENO + "${TARLZ}" -0 -n0 -cf out3 dir_link || test_failed $LINENO + "${TARLZ}" -0 -h -cf hout1 dir_link || test_failed $LINENO + "${TARLZ}" --un -h -cf hout2 dir_link || test_failed $LINENO + "${TARLZ}" -0 -n0 -h -cf hout3 dir_link || test_failed $LINENO + rm -rf dir dir_link || framework_failure + for i in 1 2 3 ; do + "${TARLZ}" -xf out$i --exclude='dir_link/*' dir_link || + test_failed $LINENO $i # Cygwin stores dir_link/* + [ -h dir_link ] || test_failed $LINENO $i + "${TARLZ}" -q -tf out$i dir_link/foo && cyg_symlink $LINENO $i + "${TARLZ}" -q -tf out$i dir_link/bar && cyg_symlink $LINENO $i + "${TARLZ}" -q -tf out$i dir_link/baz && cyg_symlink $LINENO $i + rm -rf dir_link out$i || framework_failure + "${TARLZ}" -xf hout$i || test_failed $LINENO $i + [ -d dir_link ] || test_failed $LINENO $i + cmp cfoo dir_link/foo || test_failed $LINENO $i + cmp cbar dir_link/bar || test_failed $LINENO $i + cmp cbaz dir_link/baz || test_failed $LINENO $i + rm -rf dir_link hout$i || framework_failure + done +fi + +printf "\ntesting --append..." + +# test --append compressed +cat cfoo > foo || framework_failure +cat cbar > bar || framework_failure +cat cbaz > baz || framework_failure +"${TARLZ}" -0 -cf out.tar.lz foo bar baz --out-slots=1024 || test_failed $LINENO +"${TARLZ}" -0 -cf nout.tar.lz foo bar baz --no-solid || test_failed $LINENO +"${TARLZ}" -0 -cf aout.tar.lz foo || test_failed $LINENO +"${TARLZ}" -0 -rf aout.tar.lz bar baz --no-solid || test_failed $LINENO +cmp nout.tar.lz aout.tar.lz || test_failed $LINENO +rm -f nout.tar.lz aout.tar.lz || framework_failure +touch aout.tar || framework_failure # wrong extension empty file +"${TARLZ}" -0 -rf aout.tar foo bar baz || test_failed $LINENO +cmp out.tar.lz aout.tar || test_failed $LINENO +rm -f aout.tar || framework_failure +touch aout.tar.lz || framework_failure # append to empty file +"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -0 -rf aout.tar.lz || test_failed $LINENO # append nothing +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -0 -rf aout.tar.lz -C nx_dir || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -0 -q -rf aout.tar.lz nx_file +[ $? = 1 ] || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -0 -q -rf aout.tar.lz aout.tar.lz || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -0 -r foo bar baz > aout.tar.lz || test_failed $LINENO # to stdout +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" --un -q -rf aout.tar.lz foo bar baz # wrong extension archive +[ $? = 2 ] || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +cat "${eoa_lz}" > aout.tar.lz || framework_failure # append to empty archive +"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" --un -q -rf aout.tar.lz foo bar baz # wrong extension empty archive +[ $? = 2 ] || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +rm -f out.tar.lz aout.tar.lz || framework_failure + +# test --append --uncompressed +"${TARLZ}" --un -cf out.tar foo bar baz || test_failed $LINENO +"${TARLZ}" --un -cf aout.tar foo || test_failed $LINENO +"${TARLZ}" --un -rf aout.tar foo bar baz --exclude foo || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +rm -f aout.tar || framework_failure +touch aout.tar.lz empty || framework_failure # wrong extension empty file +"${TARLZ}" --un -q -rf aout.tar.lz foo bar baz +[ $? = 2 ] || test_failed $LINENO +cmp aout.tar.lz empty || test_failed $LINENO +rm -f aout.tar.lz empty || framework_failure +touch aout.tar || framework_failure # append to empty file +"${TARLZ}" --un -rf aout.tar foo bar baz || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" --un -rf aout.tar || test_failed $LINENO # append nothing +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" --un -rf aout.tar -C nx_dir || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" --un -q -rf aout.tar nx_file +[ $? = 1 ] || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" --un -q -rf aout.tar aout.tar || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" --un -r foo bar baz > aout.tar || test_failed $LINENO # to stdout +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" -0 -q -rf aout.tar foo bar baz # wrong extension archive +[ $? = 2 ] || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +cat "${eoa}" > aout.tar || framework_failure # append to empty archive +"${TARLZ}" --un -rf aout.tar foo bar baz || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" -0 -q -rf aout.tar foo bar baz # wrong extension empty archive +[ $? = 2 ] || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +rm -f out.tar aout.tar || framework_failure + +# test --append to solid archive +"${TARLZ}" --solid -q -0 -cf out.tar.lz "${in}" foo bar || test_failed $LINENO +"${TARLZ}" -q -tf out.tar.lz || test_failed $LINENO # compressed seekable +cat out.tar.lz > aout.tar.lz || framework_failure +for i in --asolid --bsolid --dsolid --solid -0 ; do + "${TARLZ}" $i -q -rf out.tar.lz baz + [ $? = 2 ] || test_failed $LINENO $i + cmp out.tar.lz aout.tar.lz || test_failed $LINENO $i +done +rm -f out.tar.lz aout.tar.lz || framework_failure +for i in --asolid --bsolid --dsolid -0 ; do + for j in --asolid --bsolid --dsolid --solid -0 ; do + "${TARLZ}" $i -0 -cf out.tar.lz foo || test_failed $LINENO "$i $j" + "${TARLZ}" $j -0 -rf out.tar.lz bar baz || test_failed $LINENO "$i $j" + rm -f foo bar baz || framework_failure + "${TARLZ}" -xf out.tar.lz || test_failed $LINENO "$i $j" + cmp cfoo foo || test_failed $LINENO "$i $j" + cmp cbar bar || test_failed $LINENO "$i $j" + cmp cbaz baz || test_failed $LINENO "$i $j" + rm -f out.tar.lz || framework_failure + done +done +rm -f foo bar baz || framework_failure + +printf "\ntesting dirs and links..." + +# test -c -d -x on directories and links +mkdir dir1 || framework_failure +"${TARLZ}" -0 -cf out.tar.lz dir1 || test_failed $LINENO +rmdir dir1 || framework_failure +"${TARLZ}" -xf out.tar.lz || test_failed $LINENO +[ -d dir1 ] || test_failed $LINENO +rmdir dir1 +rm -f out.tar.lz || framework_failure +mkdir dir1 || framework_failure +"${TARLZ}" --uncompressed -cf out.tar dir1 || test_failed $LINENO +rmdir dir1 || framework_failure +"${TARLZ}" -xf out.tar || test_failed $LINENO +[ -d dir1 ] || test_failed $LINENO +rmdir dir1 +rm -f out.tar || framework_failure + +if [ "${ln_works}" = yes ] ; then + name_100=name_100_bytes_long_nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn + path_100=dir1/dir2/dir3/path_100_bytes_long_nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn + path_106=dir1/dir2/dir3/path_longer_than_100_bytes_nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn + mkdir dir1 || framework_failure + mkdir dir1/dir2 || framework_failure + mkdir dir1/dir2/dir3 || framework_failure + cat "${in}" > dir1/dir2/dir3/in || framework_failure + ln dir1/dir2/dir3/in dir1/dir2/dir3/"${name_100}" || framework_failure + ln dir1/dir2/dir3/in "${path_100}" || framework_failure + ln dir1/dir2/dir3/in "${path_106}" || framework_failure + ln -s dir2/ dir1/dir2_link || framework_failure + ln -s in dir1/dir2/dir3/link || framework_failure + ln -s "${name_100}" dir1/dir2/dir3/link_100 || framework_failure + "${TARLZ}" -0 -cf out.tar.lz dir1 || test_failed $LINENO + "${TARLZ}" -df out.tar.lz || test_failed $LINENO + rm -rf dir1 || framework_failure + "${TARLZ}" -xf out.tar.lz || test_failed $LINENO + "${TARLZ}" -df out.tar.lz || test_failed $LINENO + cmp "${in}" dir1/dir2/dir3/in || test_failed $LINENO + cmp "${in}" dir1/dir2_link/dir3/in || test_failed $LINENO + cmp "${in}" dir1/dir2/dir3/"${name_100}" || test_failed $LINENO + cmp "${in}" "${path_100}" || test_failed $LINENO + cmp "${in}" "${path_106}" || test_failed $LINENO + cmp "${in}" dir1/dir2/dir3/link || test_failed $LINENO + cmp "${in}" dir1/dir2/dir3/link_100 || test_failed $LINENO + rm -f dir1/dir2/dir3/in || framework_failure + cmp "${in}" dir1/dir2/dir3/link 2> /dev/null && test_failed $LINENO + cmp "${in}" dir1/dir2/dir3/link_100 || test_failed $LINENO + "${TARLZ}" -xf out.tar.lz || test_failed $LINENO + rm -f out.tar.lz || framework_failure + cmp "${in}" dir1/dir2/dir3/in || test_failed $LINENO + cmp "${in}" dir1/dir2/dir3/link || test_failed $LINENO + "${TARLZ}" -0 -q -c ../tmp/dir1 | "${TARLZ}" -x || test_failed $LINENO + diff -ru tmp/dir1 dir1 || test_failed $LINENO + rm -rf tmp dir1 || framework_failure + # test -c -d -x on dangling (broken) symlinks with trailing slashes + "${TARLZ}" -xf "${testdir}"/ts_in_link.tar.lz || test_failed $LINENO + "${TARLZ}" -df "${testdir}"/ts_in_link.tar.lz --ignore-ids || + test_failed $LINENO + "${TARLZ}" -0 -cf out.tar.lz link1 link2 link3 link4 || test_failed $LINENO + "${TARLZ}" -df out.tar.lz || test_failed $LINENO + rm -f out.tar.lz link1 link2 link3 link4 || framework_failure +fi + +printf "\ntesting long names..." + +"${TARLZ}" -q -tf "${t155}" || test_failed $LINENO +"${TARLZ}" -q -tf "${t155_lz}" || test_failed $LINENO +if [ "${ln_works}" = yes ] ; then + mkdir dir1 || framework_failure + "${TARLZ}" -C dir1 -xf "${t155}" || test_failed $LINENO + mkdir dir2 || framework_failure + "${TARLZ}" -C dir2 -xf "${t155_lz}" || test_failed $LINENO + diff -ru dir1 dir2 || test_failed $LINENO + "${TARLZ}" -cf out.tar.lz dir2 || test_failed $LINENO + rm -rf dir2 || framework_failure + "${TARLZ}" -xf out.tar.lz || test_failed $LINENO + diff -ru dir1 dir2 || test_failed $LINENO + rmdir dir2 2> /dev/null && test_failed $LINENO + rmdir dir1 2> /dev/null && test_failed $LINENO + rm -rf out.tar.lz dir2 dir1 || framework_failure +fi + +"${TARLZ}" -tvf "${testdir}"/ug32chars.tar.lz | grep -q \ + -e very_long_owner_name_of_32_chars/very_long_group_name_of_32_chars || + test_failed $LINENO +"${TARLZ}" -tvf "${testdir}"/ug32chars.tar.lz | grep -q \ + -e very_long_owner_name_of_32_charsvery_long_group_name_of_32_chars && + test_failed $LINENO +"${TARLZ}" -tvf "${testdir}"/ug32chars.tar.lz | grep -q \ + -e very_long_group_name_of_32_chars/very_long_group_name_of_32_chars && + test_failed $LINENO +"${TARLZ}" -xf "${testdir}"/ug32chars.tar.lz || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +rm -f foo || framework_failure +"${TARLZ}" -tvf "${testdir}"/ug32767.tar.lz | grep -q -e 32767/32767 || + test_failed $LINENO +"${TARLZ}" -xf "${testdir}"/ug32767.tar.lz || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +rm -f foo || framework_failure + +printf "\ntesting --compress..." + +cat cfoo > foo || framework_failure +cat cbar > bar || framework_failure +cat cbaz > baz || framework_failure +cat "${in}" > test.txt || framework_failure +"${TARLZ}" --un -cf out.tar test.txt foo bar baz test.txt || test_failed $LINENO +"${TARLZ}" --un -cf out3.tar foo bar baz || test_failed $LINENO +cat out.tar > outz.tar || framework_failure +cat out3.tar > out3z.tar || framework_failure +# +"${TARLZ}" -0 -z outz.tar out3z.tar || test_failed $LINENO +"${TARLZ}" -q -tf outz.tar.lz || test_failed $LINENO +"${TARLZ}" -q -tf out3z.tar.lz || test_failed $LINENO +cat outz.tar.lz > out || test_failed $LINENO +cat out3z.tar.lz > out3 || test_failed $LINENO +rm -f out3z.tar.lz || framework_failure +"${TARLZ}" -q -0 -z outz.tar out3z.tar # outz.tar.lz exists +[ $? = 1 ] || test_failed $LINENO +cmp out outz.tar.lz || test_failed $LINENO +cmp out3 out3z.tar.lz || test_failed $LINENO +if [ "${ln_works}" = yes ] ; then + ln -s outz.tar loutz.tar || framework_failure + "${TARLZ}" -0 -z loutz.tar || test_failed $LINENO + cmp loutz.tar.lz outz.tar.lz || test_failed $LINENO + rm -f loutz.tar.lz loutz.tar || framework_failure +fi +rm -f out out3 outz.tar.lz out3z.tar.lz || framework_failure +# +for i in --solid --no-solid ; do + "${TARLZ}" -0 -n0 $i -cf out.tar.lz test.txt foo bar baz test.txt || test_failed $LINENO $i + "${TARLZ}" -0 -z -o - $i out.tar | cmp out.tar.lz - || test_failed $LINENO $i + "${TARLZ}" -0 -n0 $i -cf out3.tar.lz foo bar baz || test_failed $LINENO $i + "${TARLZ}" -0 -z -o - $i out3.tar | cmp out3.tar.lz - || test_failed $LINENO $i + "${TARLZ}" -0 -z $i outz.tar out3z.tar || test_failed $LINENO $i + cmp out.tar.lz outz.tar.lz || test_failed $LINENO $i + cmp out3.tar.lz out3z.tar.lz || test_failed $LINENO $i + rm -f outz.tar.lz out3z.tar.lz || framework_failure +done +# +"${TARLZ}" -0 -B8KiB -n0 --bsolid -cf out.tar.lz test.txt foo bar baz test.txt || test_failed $LINENO +"${TARLZ}" -0 -B8KiB -z -o - --bsolid out.tar | cmp out.tar.lz - || test_failed $LINENO +"${TARLZ}" -0 -B8KiB -z -o out --bsolid out.tar || test_failed $LINENO +cmp out.tar.lz out || test_failed $LINENO +"${TARLZ}" -0 -B8KiB -z --bsolid outz.tar || test_failed $LINENO +cmp out.tar.lz outz.tar.lz || test_failed $LINENO +rm -f out outz.tar.lz || framework_failure +# +"${TARLZ}" -0 -n0 --asolid -cf out.tar.lz test.txt foo bar baz test.txt || test_failed $LINENO +"${TARLZ}" -0 -n0 --asolid -cf out3.tar.lz foo bar baz || test_failed $LINENO +for i in --asolid --bsolid --dsolid ; do + cat out.tar | "${TARLZ}" -0 -z $i | cmp out.tar.lz - || test_failed $LINENO $i + "${TARLZ}" -0 -z -o out $i out.tar || test_failed $LINENO $i + cmp out.tar.lz out || test_failed $LINENO $i + "${TARLZ}" -0 -z $i outz.tar out3z.tar || test_failed $LINENO $i + cmp out.tar.lz outz.tar.lz || test_failed $LINENO $i + cmp out3.tar.lz out3z.tar.lz || test_failed $LINENO $i + rm -f out outz.tar.lz out3z.tar.lz || framework_failure +done +# concatenate and compress +"${TARLZ}" --un -cf foo.tar foo || test_failed $LINENO +"${TARLZ}" --un -cf bar.tar bar || test_failed $LINENO +"${TARLZ}" --un -cf baz.tar baz || test_failed $LINENO +"${TARLZ}" -A foo.tar bar.tar baz.tar | "${TARLZ}" -0 -z -o foobarbaz.tar.lz || + test_failed $LINENO +cmp out3.tar.lz foobarbaz.tar.lz || test_failed $LINENO +# compress and concatenate +"${TARLZ}" -0 -z foo.tar bar.tar baz.tar || test_failed $LINENO +"${TARLZ}" -A foo.tar.lz bar.tar.lz baz.tar.lz > foobarbaz.tar.lz || + test_failed $LINENO +"${TARLZ}" -0 -n0 --no-solid -c foo bar baz | cmp foobarbaz.tar.lz - || + test_failed $LINENO +rm -f foo bar baz test.txt out.tar.lz out.tar outz.tar foobarbaz.tar.lz \ + out3.tar out3.tar.lz out3z.tar foo.tar bar.tar baz.tar \ + foo.tar.lz bar.tar.lz baz.tar.lz || framework_failure + +printf "\ntesting bad input..." + +# test --extract ".." +mkdir dir1 || framework_failure +cd dir1 || framework_failure +for i in 0 2 ; do # try serial and parallel decoders + "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot1.tar.lz || test_failed $LINENO $i + [ ! -e ../dir ] || test_failed $LINENO $i + "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot2.tar.lz || test_failed $LINENO $i + [ ! -e ../dir ] || test_failed $LINENO $i + "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot3.tar.lz || test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot4.tar.lz || test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot5.tar.lz || test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i +done +cd .. || framework_failure +rm -rf dir1 || framework_failure + +# test --list and --extract truncated tar +dd if="${in_tar}" of=truncated.tar bs=1000 count=1 2> /dev/null +"${TARLZ}" -q -tf truncated.tar > /dev/null +[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -q -xf truncated.tar +[ $? = 2 ] || test_failed $LINENO +[ ! -e test.txt ] || test_failed $LINENO +rm -f truncated.tar || framework_failure + +# test --delete with split 'bar' tar member +for i in 1 2 3 4 ; do + cat "${testdir}"/test3_sm${i}.tar.lz > out.tar.lz || framework_failure + for j in bar baz ; do + "${TARLZ}" -q -f out.tar.lz --delete $j + [ $? = 2 ] || test_failed $LINENO "$i $j" + done + cmp "${testdir}"/test3_sm${i}.tar.lz out.tar.lz || test_failed $LINENO $i + "${TARLZ}" -q -f out.tar.lz --delete foo + [ $? = 2 ] || test_failed $LINENO $i + "${TARLZ}" -xf out.tar.lz || test_failed $LINENO $i + [ ! -e foo ] || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f out.tar.lz foo bar baz || framework_failure +done + +# test --list and --extract format violations +if [ "${ln_works}" = yes ] ; then + mkdir dir1 || framework_failure + "${TARLZ}" -C dir1 -xf "${t155}" || test_failed $LINENO +fi +for i in 1 2 3 ; do + "${TARLZ}" -q -tf "${testdir}"/t155_fv${i}.tar + [ $? = 2 ] || test_failed $LINENO $i + "${TARLZ}" -q -tf "${testdir}"/t155_fv${i}.tar --permissive || + test_failed $LINENO $i + if [ "${ln_works}" = yes ] ; then + mkdir dir2 || framework_failure + "${TARLZ}" -C dir2 -xf "${testdir}"/t155_fv${i}.tar --permissive || + test_failed $LINENO $i + diff -ru dir1 dir2 || test_failed $LINENO $i + rm -rf dir2 || framework_failure + fi +done +for i in 1 2 3 4 5 6 ; do + "${TARLZ}" -q -tf "${testdir}"/t155_fv${i}.tar.lz + [ $? = 2 ] || test_failed $LINENO $i + "${TARLZ}" -q -tf "${testdir}"/t155_fv${i}.tar.lz --permissive || + test_failed $LINENO $i + if [ "${ln_works}" = yes ] ; then + mkdir dir2 || framework_failure + "${TARLZ}" -n4 -C dir2 -xf "${testdir}"/t155_fv${i}.tar.lz --permissive || + test_failed $LINENO $i + diff -ru dir1 dir2 || test_failed $LINENO $i + rm -rf dir2 || framework_failure + fi +done +if [ "${ln_works}" = yes ] ; then rm -rf dir1 || framework_failure ; fi + +for i in "${testdir}"/test3_nn.tar "${testdir}"/test3_nn.tar.lz ; do + "${TARLZ}" -q -n0 -tf "$i" || test_failed $LINENO $i + "${TARLZ}" -q -n4 -tf "$i" || test_failed $LINENO $i + "${TARLZ}" -q -n0 -xf "$i" || test_failed $LINENO $i + "${TARLZ}" -n0 -df "$i" --ignore-ids || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + [ ! -e bar ] || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n4 -xf "$i" || test_failed $LINENO $i + "${TARLZ}" -n4 -df "$i" --ignore-ids || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + [ ! -e bar ] || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure +done + +printf "\ntesting --keep-damaged..." + +# test --extract and --keep-damaged compressed +rm -f test.txt || framework_failure +for i in "${inbad1}" "${inbad2}" ; do + "${TARLZ}" -q -xf "${i}.tar.lz" + [ $? = 2 ] || test_failed $LINENO "$i" + [ ! -e test.txt ] || test_failed $LINENO "$i" + rm -f test.txt || framework_failure + "${TARLZ}" -q -n0 -xf "${i}.tar.lz" --keep-damaged + [ $? = 2 ] || test_failed $LINENO "$i" + [ -e test.txt ] || test_failed $LINENO "$i" + cmp "$i" test.txt 2> /dev/null || test_failed $LINENO $i + rm -f test.txt || framework_failure +done +# +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad1_lz}" +[ $? = 2 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad2_lz}" +[ $? = 2 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad3_lz}" +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad3_lz}" --keep-damaged +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar 2> /dev/null || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad4_lz}" +[ $? = 2 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad4_lz}" --keep-damaged +[ $? = 2 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +cmp cbar bar 2> /dev/null || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad5_lz}" +[ $? = 2 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad5_lz}" --keep-damaged +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo 2> /dev/null || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad6_lz}" +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO + +# test --extract and --keep-damaged uncompressed +rm -f test.txt || framework_failure +"${TARLZ}" -q -xf "${inbad1}.tar" +[ $? = 2 ] || test_failed $LINENO +[ ! -e test.txt ] || test_failed $LINENO +rm -f test.txt || framework_failure +"${TARLZ}" -q -xf "${inbad1}.tar" --keep-damaged +[ $? = 2 ] || test_failed $LINENO +[ -e test.txt ] || test_failed $LINENO +cmp "${inbad1}" test.txt 2> /dev/null || test_failed $LINENO +rm -f test.txt || framework_failure +# +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -xf "${bad1}" +[ $? = 2 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -xf "${bad2}" +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -xf "${bad3}" +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -xf "${bad4}" +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -xf "${bad5}" +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f cfoo cbar cbaz foo bar baz || framework_failure +# +rm -f test3.tar.lz || framework_failure +"${TARLZ}" -q -xf "${tlzit1}" +[ $? = 2 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +[ ! -e test3.tar.lz ] || test_failed $LINENO +rm -f foo bar baz test3.tar.lz || framework_failure +"${TARLZ}" -q -xf "${tlzit2}" +[ $? = 2 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +cmp "${test3_lz}" test3.tar.lz || test_failed $LINENO +rm -f foo bar baz test3.tar.lz || framework_failure + +echo +if [ ${fail} = 0 ] ; then + echo "tests completed successfully." + cd "${objdir}" && rm -r tmp +else + echo "tests failed." +fi +exit ${fail} diff --git a/testsuite/dotdot1.tar.lz b/testsuite/dotdot1.tar.lz Binary files differnew file mode 100644 index 0000000..9884d9f --- /dev/null +++ b/testsuite/dotdot1.tar.lz diff --git a/testsuite/dotdot2.tar.lz b/testsuite/dotdot2.tar.lz Binary files differnew file mode 100644 index 0000000..a60b898 --- /dev/null +++ b/testsuite/dotdot2.tar.lz diff --git a/testsuite/dotdot3.tar.lz b/testsuite/dotdot3.tar.lz Binary files differnew file mode 100644 index 0000000..163fb5c --- /dev/null +++ b/testsuite/dotdot3.tar.lz diff --git a/testsuite/dotdot4.tar.lz b/testsuite/dotdot4.tar.lz Binary files differnew file mode 100644 index 0000000..8c6a0ee --- /dev/null +++ b/testsuite/dotdot4.tar.lz diff --git a/testsuite/dotdot5.tar.lz b/testsuite/dotdot5.tar.lz Binary files differnew file mode 100644 index 0000000..a62cd18 --- /dev/null +++ b/testsuite/dotdot5.tar.lz diff --git a/testsuite/eoa_blocks.tar b/testsuite/eoa_blocks.tar Binary files differnew file mode 100644 index 0000000..06d7405 --- /dev/null +++ b/testsuite/eoa_blocks.tar diff --git a/testsuite/eoa_blocks.tar.lz b/testsuite/eoa_blocks.tar.lz Binary files differnew file mode 100644 index 0000000..328273c --- /dev/null +++ b/testsuite/eoa_blocks.tar.lz diff --git a/testsuite/rbar b/testsuite/rbar new file mode 100644 index 0000000..5716ca5 --- /dev/null +++ b/testsuite/rbar @@ -0,0 +1 @@ +bar diff --git a/testsuite/rbaz b/testsuite/rbaz new file mode 100644 index 0000000..7601807 --- /dev/null +++ b/testsuite/rbaz @@ -0,0 +1 @@ +baz diff --git a/testsuite/rfoo b/testsuite/rfoo new file mode 100644 index 0000000..257cc56 --- /dev/null +++ b/testsuite/rfoo @@ -0,0 +1 @@ +foo diff --git a/testsuite/t155.tar b/testsuite/t155.tar Binary files differnew file mode 100644 index 0000000..f2b8a4e --- /dev/null +++ b/testsuite/t155.tar diff --git a/testsuite/t155.tar.lz b/testsuite/t155.tar.lz Binary files differnew file mode 100644 index 0000000..edc7f04 --- /dev/null +++ b/testsuite/t155.tar.lz diff --git a/testsuite/t155_fv1.tar b/testsuite/t155_fv1.tar Binary files differnew file mode 100644 index 0000000..1ef64c3 --- /dev/null +++ b/testsuite/t155_fv1.tar diff --git a/testsuite/t155_fv1.tar.lz b/testsuite/t155_fv1.tar.lz Binary files differnew file mode 100644 index 0000000..896925e --- /dev/null +++ b/testsuite/t155_fv1.tar.lz diff --git a/testsuite/t155_fv2.tar b/testsuite/t155_fv2.tar Binary files differnew file mode 100644 index 0000000..f732b30 --- /dev/null +++ b/testsuite/t155_fv2.tar diff --git a/testsuite/t155_fv2.tar.lz b/testsuite/t155_fv2.tar.lz Binary files differnew file mode 100644 index 0000000..b380105 --- /dev/null +++ b/testsuite/t155_fv2.tar.lz diff --git a/testsuite/t155_fv3.tar b/testsuite/t155_fv3.tar Binary files differnew file mode 100644 index 0000000..fe5db13 --- /dev/null +++ b/testsuite/t155_fv3.tar diff --git a/testsuite/t155_fv3.tar.lz b/testsuite/t155_fv3.tar.lz Binary files differnew file mode 100644 index 0000000..aa24c0a --- /dev/null +++ b/testsuite/t155_fv3.tar.lz diff --git a/testsuite/t155_fv4.tar.lz b/testsuite/t155_fv4.tar.lz Binary files differnew file mode 100644 index 0000000..e3ae9c3 --- /dev/null +++ b/testsuite/t155_fv4.tar.lz diff --git a/testsuite/t155_fv5.tar.lz b/testsuite/t155_fv5.tar.lz Binary files differnew file mode 100644 index 0000000..966015a --- /dev/null +++ b/testsuite/t155_fv5.tar.lz diff --git a/testsuite/t155_fv6.tar.lz b/testsuite/t155_fv6.tar.lz Binary files differnew file mode 100644 index 0000000..bc83237 --- /dev/null +++ b/testsuite/t155_fv6.tar.lz diff --git a/testsuite/tar_in_tlz1.tar.lz b/testsuite/tar_in_tlz1.tar.lz Binary files differnew file mode 100644 index 0000000..bf04f25 --- /dev/null +++ b/testsuite/tar_in_tlz1.tar.lz diff --git a/testsuite/tar_in_tlz2.tar.lz b/testsuite/tar_in_tlz2.tar.lz Binary files differnew file mode 100644 index 0000000..de8453b --- /dev/null +++ b/testsuite/tar_in_tlz2.tar.lz diff --git a/testsuite/test.txt b/testsuite/test.txt new file mode 100644 index 0000000..9196a3a --- /dev/null +++ b/testsuite/test.txt @@ -0,0 +1,676 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) <year> <name of author> + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. + GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) <year> <name of author>
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/testsuite/test.txt.lz b/testsuite/test.txt.lz Binary files differnew file mode 100644 index 0000000..46f98a7 --- /dev/null +++ b/testsuite/test.txt.lz diff --git a/testsuite/test.txt.tar b/testsuite/test.txt.tar Binary files differnew file mode 100644 index 0000000..d687b43 --- /dev/null +++ b/testsuite/test.txt.tar diff --git a/testsuite/test.txt.tar.lz b/testsuite/test.txt.tar.lz Binary files differnew file mode 100644 index 0000000..306eeeb --- /dev/null +++ b/testsuite/test.txt.tar.lz diff --git a/testsuite/test3.tar b/testsuite/test3.tar Binary files differnew file mode 100644 index 0000000..d58fb45 --- /dev/null +++ b/testsuite/test3.tar diff --git a/testsuite/test3.tar.lz b/testsuite/test3.tar.lz Binary files differnew file mode 100644 index 0000000..779ace4 --- /dev/null +++ b/testsuite/test3.tar.lz diff --git a/testsuite/test3_bad1.tar b/testsuite/test3_bad1.tar Binary files differnew file mode 100644 index 0000000..005b6a3 --- /dev/null +++ b/testsuite/test3_bad1.tar diff --git a/testsuite/test3_bad1.tar.lz b/testsuite/test3_bad1.tar.lz Binary files differnew file mode 100644 index 0000000..9f5d40f --- /dev/null +++ b/testsuite/test3_bad1.tar.lz diff --git a/testsuite/test3_bad2.tar b/testsuite/test3_bad2.tar Binary files differnew file mode 100644 index 0000000..d2c546b --- /dev/null +++ b/testsuite/test3_bad2.tar diff --git a/testsuite/test3_bad2.tar.lz b/testsuite/test3_bad2.tar.lz Binary files differnew file mode 100644 index 0000000..182c048 --- /dev/null +++ b/testsuite/test3_bad2.tar.lz diff --git a/testsuite/test3_bad3.tar b/testsuite/test3_bad3.tar Binary files differnew file mode 100644 index 0000000..7d78e87 --- /dev/null +++ b/testsuite/test3_bad3.tar diff --git a/testsuite/test3_bad3.tar.lz b/testsuite/test3_bad3.tar.lz Binary files differnew file mode 100644 index 0000000..3b46163 --- /dev/null +++ b/testsuite/test3_bad3.tar.lz diff --git a/testsuite/test3_bad4.tar b/testsuite/test3_bad4.tar Binary files differnew file mode 100644 index 0000000..68312b3 --- /dev/null +++ b/testsuite/test3_bad4.tar diff --git a/testsuite/test3_bad4.tar.lz b/testsuite/test3_bad4.tar.lz Binary files differnew file mode 100644 index 0000000..7ac6d98 --- /dev/null +++ b/testsuite/test3_bad4.tar.lz diff --git a/testsuite/test3_bad5.tar b/testsuite/test3_bad5.tar Binary files differnew file mode 100644 index 0000000..e482969 --- /dev/null +++ b/testsuite/test3_bad5.tar diff --git a/testsuite/test3_bad5.tar.lz b/testsuite/test3_bad5.tar.lz Binary files differnew file mode 100644 index 0000000..5b4feb3 --- /dev/null +++ b/testsuite/test3_bad5.tar.lz diff --git a/testsuite/test3_bad6.tar.lz b/testsuite/test3_bad6.tar.lz Binary files differnew file mode 100644 index 0000000..42b3888 --- /dev/null +++ b/testsuite/test3_bad6.tar.lz diff --git a/testsuite/test3_dir.tar b/testsuite/test3_dir.tar Binary files differnew file mode 100644 index 0000000..e0c2b29 --- /dev/null +++ b/testsuite/test3_dir.tar diff --git a/testsuite/test3_dir.tar.lz b/testsuite/test3_dir.tar.lz Binary files differnew file mode 100644 index 0000000..8eb3f43 --- /dev/null +++ b/testsuite/test3_dir.tar.lz diff --git a/testsuite/test3_dot.tar.lz b/testsuite/test3_dot.tar.lz Binary files differnew file mode 100644 index 0000000..8fd3d1f --- /dev/null +++ b/testsuite/test3_dot.tar.lz diff --git a/testsuite/test3_em1.tar.lz b/testsuite/test3_em1.tar.lz Binary files differnew file mode 100644 index 0000000..0aa8724 --- /dev/null +++ b/testsuite/test3_em1.tar.lz diff --git a/testsuite/test3_em2.tar.lz b/testsuite/test3_em2.tar.lz Binary files differnew file mode 100644 index 0000000..4fe4e5d --- /dev/null +++ b/testsuite/test3_em2.tar.lz diff --git a/testsuite/test3_em3.tar.lz b/testsuite/test3_em3.tar.lz Binary files differnew file mode 100644 index 0000000..49e2eab --- /dev/null +++ b/testsuite/test3_em3.tar.lz diff --git a/testsuite/test3_em4.tar.lz b/testsuite/test3_em4.tar.lz Binary files differnew file mode 100644 index 0000000..95df508 --- /dev/null +++ b/testsuite/test3_em4.tar.lz diff --git a/testsuite/test3_em5.tar.lz b/testsuite/test3_em5.tar.lz Binary files differnew file mode 100644 index 0000000..706beb5 --- /dev/null +++ b/testsuite/test3_em5.tar.lz diff --git a/testsuite/test3_em6.tar.lz b/testsuite/test3_em6.tar.lz Binary files differnew file mode 100644 index 0000000..806884d --- /dev/null +++ b/testsuite/test3_em6.tar.lz diff --git a/testsuite/test3_eoa1.tar b/testsuite/test3_eoa1.tar Binary files differnew file mode 100644 index 0000000..175b807 --- /dev/null +++ b/testsuite/test3_eoa1.tar diff --git a/testsuite/test3_eoa1.tar.lz b/testsuite/test3_eoa1.tar.lz Binary files differnew file mode 100644 index 0000000..0eb86e4 --- /dev/null +++ b/testsuite/test3_eoa1.tar.lz diff --git a/testsuite/test3_eoa2.tar b/testsuite/test3_eoa2.tar Binary files differnew file mode 100644 index 0000000..458be1e --- /dev/null +++ b/testsuite/test3_eoa2.tar diff --git a/testsuite/test3_eoa2.tar.lz b/testsuite/test3_eoa2.tar.lz Binary files differnew file mode 100644 index 0000000..1f47953 --- /dev/null +++ b/testsuite/test3_eoa2.tar.lz diff --git a/testsuite/test3_eoa3.tar b/testsuite/test3_eoa3.tar Binary files differnew file mode 100644 index 0000000..3003a93 --- /dev/null +++ b/testsuite/test3_eoa3.tar diff --git a/testsuite/test3_eoa3.tar.lz b/testsuite/test3_eoa3.tar.lz Binary files differnew file mode 100644 index 0000000..20ba9f8 --- /dev/null +++ b/testsuite/test3_eoa3.tar.lz diff --git a/testsuite/test3_eoa4.tar b/testsuite/test3_eoa4.tar Binary files differnew file mode 100644 index 0000000..4012fea --- /dev/null +++ b/testsuite/test3_eoa4.tar diff --git a/testsuite/test3_eoa4.tar.lz b/testsuite/test3_eoa4.tar.lz Binary files differnew file mode 100644 index 0000000..1593feb --- /dev/null +++ b/testsuite/test3_eoa4.tar.lz diff --git a/testsuite/test3_eoa5.tar.lz b/testsuite/test3_eoa5.tar.lz Binary files differnew file mode 100644 index 0000000..156bd3a --- /dev/null +++ b/testsuite/test3_eoa5.tar.lz diff --git a/testsuite/test3_gh1.tar b/testsuite/test3_gh1.tar Binary files differnew file mode 100644 index 0000000..f969561 --- /dev/null +++ b/testsuite/test3_gh1.tar diff --git a/testsuite/test3_gh1.tar.lz b/testsuite/test3_gh1.tar.lz Binary files differnew file mode 100644 index 0000000..d38f46b --- /dev/null +++ b/testsuite/test3_gh1.tar.lz diff --git a/testsuite/test3_gh2.tar b/testsuite/test3_gh2.tar Binary files differnew file mode 100644 index 0000000..f5f0c31 --- /dev/null +++ b/testsuite/test3_gh2.tar diff --git a/testsuite/test3_gh2.tar.lz b/testsuite/test3_gh2.tar.lz Binary files differnew file mode 100644 index 0000000..48f18dd --- /dev/null +++ b/testsuite/test3_gh2.tar.lz diff --git a/testsuite/test3_gh3.tar b/testsuite/test3_gh3.tar Binary files differnew file mode 100644 index 0000000..e0d3a9d --- /dev/null +++ b/testsuite/test3_gh3.tar diff --git a/testsuite/test3_gh3.tar.lz b/testsuite/test3_gh3.tar.lz Binary files differnew file mode 100644 index 0000000..89a31a6 --- /dev/null +++ b/testsuite/test3_gh3.tar.lz diff --git a/testsuite/test3_gh4.tar b/testsuite/test3_gh4.tar Binary files differnew file mode 100644 index 0000000..0655c31 --- /dev/null +++ b/testsuite/test3_gh4.tar diff --git a/testsuite/test3_gh4.tar.lz b/testsuite/test3_gh4.tar.lz Binary files differnew file mode 100644 index 0000000..5b9f605 --- /dev/null +++ b/testsuite/test3_gh4.tar.lz diff --git a/testsuite/test3_gh5.tar.lz b/testsuite/test3_gh5.tar.lz Binary files differnew file mode 100644 index 0000000..b8f4abe --- /dev/null +++ b/testsuite/test3_gh5.tar.lz diff --git a/testsuite/test3_gh6.tar.lz b/testsuite/test3_gh6.tar.lz Binary files differnew file mode 100644 index 0000000..7be9aca --- /dev/null +++ b/testsuite/test3_gh6.tar.lz diff --git a/testsuite/test3_nn.tar b/testsuite/test3_nn.tar Binary files differnew file mode 100644 index 0000000..c738dee --- /dev/null +++ b/testsuite/test3_nn.tar diff --git a/testsuite/test3_nn.tar.lz b/testsuite/test3_nn.tar.lz Binary files differnew file mode 100644 index 0000000..8f78c1b --- /dev/null +++ b/testsuite/test3_nn.tar.lz diff --git a/testsuite/test3_sm1.tar.lz b/testsuite/test3_sm1.tar.lz Binary files differnew file mode 100644 index 0000000..6eb3947 --- /dev/null +++ b/testsuite/test3_sm1.tar.lz diff --git a/testsuite/test3_sm2.tar.lz b/testsuite/test3_sm2.tar.lz Binary files differnew file mode 100644 index 0000000..f312fcb --- /dev/null +++ b/testsuite/test3_sm2.tar.lz diff --git a/testsuite/test3_sm3.tar.lz b/testsuite/test3_sm3.tar.lz Binary files differnew file mode 100644 index 0000000..82ceb18 --- /dev/null +++ b/testsuite/test3_sm3.tar.lz diff --git a/testsuite/test3_sm4.tar.lz b/testsuite/test3_sm4.tar.lz Binary files differnew file mode 100644 index 0000000..601a640 --- /dev/null +++ b/testsuite/test3_sm4.tar.lz diff --git a/testsuite/test_bad1.txt b/testsuite/test_bad1.txt new file mode 100644 index 0000000..f8463f4 --- /dev/null +++ b/testsuite/test_bad1.txt @@ -0,0 +1,307 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program
\ No newline at end of file diff --git a/testsuite/test_bad1.txt.tar b/testsuite/test_bad1.txt.tar Binary files differnew file mode 100644 index 0000000..dc2140e --- /dev/null +++ b/testsuite/test_bad1.txt.tar diff --git a/testsuite/test_bad1.txt.tar.lz b/testsuite/test_bad1.txt.tar.lz Binary files differnew file mode 100644 index 0000000..afb1e85 --- /dev/null +++ b/testsuite/test_bad1.txt.tar.lz diff --git a/testsuite/test_bad2.txt b/testsuite/test_bad2.txt new file mode 100644 index 0000000..452408f --- /dev/null +++ b/testsuite/test_bad2.txt @@ -0,0 +1,320 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. Ifodifnperived from the Progr"work based on therogrdifneneraeuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT X FR TO NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT X FR TO NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARQIS NO WARRATHERE IS NO WARRANTY +FOR THE + + This ee
\ No newline at end of file diff --git a/testsuite/test_bad2.txt.tar.lz b/testsuite/test_bad2.txt.tar.lz Binary files differnew file mode 100644 index 0000000..598e121 --- /dev/null +++ b/testsuite/test_bad2.txt.tar.lz diff --git a/testsuite/tlz_in_tar1.tar b/testsuite/tlz_in_tar1.tar Binary files differnew file mode 100644 index 0000000..f2dfd6c --- /dev/null +++ b/testsuite/tlz_in_tar1.tar diff --git a/testsuite/tlz_in_tar2.tar b/testsuite/tlz_in_tar2.tar Binary files differnew file mode 100644 index 0000000..be860c6 --- /dev/null +++ b/testsuite/tlz_in_tar2.tar diff --git a/testsuite/ts_in_link.tar.lz b/testsuite/ts_in_link.tar.lz Binary files differnew file mode 100644 index 0000000..dff816c --- /dev/null +++ b/testsuite/ts_in_link.tar.lz diff --git a/testsuite/ug32767.tar.lz b/testsuite/ug32767.tar.lz Binary files differnew file mode 100644 index 0000000..499dc6d --- /dev/null +++ b/testsuite/ug32767.tar.lz diff --git a/testsuite/ug32chars.tar.lz b/testsuite/ug32chars.tar.lz Binary files differnew file mode 100644 index 0000000..6c4da26 --- /dev/null +++ b/testsuite/ug32chars.tar.lz |