From 4caff76ff707b2d82726ef94872c3597fd62a4ae Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 16:27:09 +0200 Subject: Adding upstream version 0.23. Signed-off-by: Daniel Baumann --- AUTHORS | 1 + COPYING | 338 ++++++++++ ChangeLog | 215 ++++++ INSTALL | 80 +++ Makefile.in | 174 +++++ NEWS | 53 ++ README | 97 +++ archive_reader.cc | 273 ++++++++ archive_reader.h | 120 ++++ arg_parser.cc | 197 ++++++ arg_parser.h | 110 ++++ common.cc | 151 +++++ common_decode.cc | 243 +++++++ compress.cc | 375 +++++++++++ configure | 200 ++++++ create.cc | 788 ++++++++++++++++++++++ create.h | 48 ++ create_lz.cc | 600 +++++++++++++++++ decode.cc | 492 ++++++++++++++ decode.h | 32 + decode_lz.cc | 763 ++++++++++++++++++++++ delete.cc | 190 ++++++ delete_lz.cc | 139 ++++ doc/tarlz.1 | 177 +++++ doc/tarlz.info | 1272 ++++++++++++++++++++++++++++++++++++ doc/tarlz.texi | 1338 +++++++++++++++++++++++++++++++++++++ exclude.cc | 54 ++ extended.cc | 415 ++++++++++++ lzip_index.cc | 221 +++++++ lzip_index.h | 93 +++ main.cc | 723 ++++++++++++++++++++ tarlz.h | 609 +++++++++++++++++ testsuite/check.sh | 1417 ++++++++++++++++++++++++++++++++++++++++ testsuite/dotdot1.tar.lz | Bin 0 -> 139 bytes testsuite/dotdot2.tar.lz | Bin 0 -> 140 bytes testsuite/dotdot3.tar.lz | Bin 0 -> 141 bytes testsuite/dotdot4.tar.lz | Bin 0 -> 140 bytes testsuite/dotdot5.tar.lz | Bin 0 -> 139 bytes testsuite/eoa_blocks.tar | Bin 0 -> 1024 bytes testsuite/eoa_blocks.tar.lz | Bin 0 -> 44 bytes testsuite/rbar | 1 + testsuite/rbaz | 1 + testsuite/rfoo | 1 + testsuite/t155.tar | Bin 0 -> 9216 bytes testsuite/t155.tar.lz | Bin 0 -> 906 bytes testsuite/t155_fv1.tar | Bin 0 -> 10240 bytes testsuite/t155_fv1.tar.lz | Bin 0 -> 914 bytes testsuite/t155_fv2.tar | Bin 0 -> 10240 bytes testsuite/t155_fv2.tar.lz | Bin 0 -> 1042 bytes testsuite/t155_fv3.tar | Bin 0 -> 10240 bytes testsuite/t155_fv3.tar.lz | Bin 0 -> 915 bytes testsuite/t155_fv4.tar.lz | Bin 0 -> 1031 bytes testsuite/t155_fv5.tar.lz | Bin 0 -> 1173 bytes testsuite/t155_fv6.tar.lz | Bin 0 -> 1031 bytes testsuite/tar_in_tlz1.tar.lz | Bin 0 -> 7680 bytes testsuite/tar_in_tlz2.tar.lz | Bin 0 -> 7807 bytes testsuite/test.txt | 676 +++++++++++++++++++ testsuite/test.txt.lz | Bin 0 -> 7392 bytes testsuite/test.txt.tar | Bin 0 -> 38400 bytes testsuite/test.txt.tar.lz | Bin 0 -> 7495 bytes testsuite/test3.tar | Bin 0 -> 4096 bytes testsuite/test3.tar.lz | Bin 0 -> 356 bytes testsuite/test3_bad1.tar | Bin 0 -> 4096 bytes testsuite/test3_bad1.tar.lz | Bin 0 -> 356 bytes testsuite/test3_bad2.tar | Bin 0 -> 4096 bytes testsuite/test3_bad2.tar.lz | Bin 0 -> 356 bytes testsuite/test3_bad3.tar | Bin 0 -> 4096 bytes testsuite/test3_bad3.tar.lz | Bin 0 -> 356 bytes testsuite/test3_bad4.tar | Bin 0 -> 4096 bytes testsuite/test3_bad4.tar.lz | Bin 0 -> 356 bytes testsuite/test3_bad5.tar | Bin 0 -> 4608 bytes testsuite/test3_bad5.tar.lz | Bin 0 -> 356 bytes testsuite/test3_bad6.tar.lz | Bin 0 -> 866 bytes testsuite/test3_dir.tar | Bin 0 -> 4096 bytes testsuite/test3_dir.tar.lz | Bin 0 -> 358 bytes testsuite/test3_dot.tar.lz | Bin 0 -> 1126 bytes testsuite/test3_em1.tar.lz | Bin 0 -> 392 bytes testsuite/test3_em2.tar.lz | Bin 0 -> 392 bytes testsuite/test3_em3.tar.lz | Bin 0 -> 392 bytes testsuite/test3_em4.tar.lz | Bin 0 -> 392 bytes testsuite/test3_em5.tar.lz | Bin 0 -> 392 bytes testsuite/test3_em6.tar.lz | Bin 0 -> 500 bytes testsuite/test3_eoa1.tar | Bin 0 -> 3072 bytes testsuite/test3_eoa1.tar.lz | Bin 0 -> 312 bytes testsuite/test3_eoa2.tar | Bin 0 -> 3584 bytes testsuite/test3_eoa2.tar.lz | Bin 0 -> 352 bytes testsuite/test3_eoa3.tar | Bin 0 -> 4608 bytes testsuite/test3_eoa3.tar.lz | Bin 0 -> 396 bytes testsuite/test3_eoa4.tar | Bin 0 -> 4096 bytes testsuite/test3_eoa4.tar.lz | Bin 0 -> 535 bytes testsuite/test3_eoa5.tar.lz | Bin 0 -> 535 bytes testsuite/test3_gh1.tar | Bin 0 -> 5120 bytes testsuite/test3_gh1.tar.lz | Bin 0 -> 574 bytes testsuite/test3_gh2.tar | Bin 0 -> 5120 bytes testsuite/test3_gh2.tar.lz | Bin 0 -> 607 bytes testsuite/test3_gh3.tar | Bin 0 -> 5120 bytes testsuite/test3_gh3.tar.lz | Bin 0 -> 645 bytes testsuite/test3_gh4.tar | Bin 0 -> 5120 bytes testsuite/test3_gh4.tar.lz | Bin 0 -> 795 bytes testsuite/test3_gh5.tar.lz | Bin 0 -> 574 bytes testsuite/test3_gh6.tar.lz | Bin 0 -> 521 bytes testsuite/test3_nn.tar | Bin 0 -> 4096 bytes testsuite/test3_nn.tar.lz | Bin 0 -> 350 bytes testsuite/test3_sm1.tar.lz | Bin 0 -> 579 bytes testsuite/test3_sm2.tar.lz | Bin 0 -> 612 bytes testsuite/test3_sm3.tar.lz | Bin 0 -> 650 bytes testsuite/test3_sm4.tar.lz | Bin 0 -> 798 bytes testsuite/test_bad1.txt | 307 +++++++++ testsuite/test_bad1.txt.tar | Bin 0 -> 17014 bytes testsuite/test_bad1.txt.tar.lz | Bin 0 -> 6000 bytes testsuite/test_bad2.txt | 320 +++++++++ testsuite/test_bad2.txt.tar.lz | Bin 0 -> 7495 bytes testsuite/tlz_in_tar1.tar | Bin 0 -> 2048 bytes testsuite/tlz_in_tar2.tar | Bin 0 -> 3072 bytes testsuite/ts_in_link.tar.lz | Bin 0 -> 509 bytes testsuite/ug32767.tar.lz | Bin 0 -> 136 bytes testsuite/ug32chars.tar.lz | Bin 0 -> 176 bytes 117 files changed, 13304 insertions(+) create mode 100644 AUTHORS create mode 100644 COPYING create mode 100644 ChangeLog create mode 100644 INSTALL create mode 100644 Makefile.in create mode 100644 NEWS create mode 100644 README create mode 100644 archive_reader.cc create mode 100644 archive_reader.h create mode 100644 arg_parser.cc create mode 100644 arg_parser.h create mode 100644 common.cc create mode 100644 common_decode.cc create mode 100644 compress.cc create mode 100755 configure create mode 100644 create.cc create mode 100644 create.h create mode 100644 create_lz.cc create mode 100644 decode.cc create mode 100644 decode.h create mode 100644 decode_lz.cc create mode 100644 delete.cc create mode 100644 delete_lz.cc create mode 100644 doc/tarlz.1 create mode 100644 doc/tarlz.info create mode 100644 doc/tarlz.texi create mode 100644 exclude.cc create mode 100644 extended.cc create mode 100644 lzip_index.cc create mode 100644 lzip_index.h create mode 100644 main.cc create mode 100644 tarlz.h create mode 100755 testsuite/check.sh create mode 100644 testsuite/dotdot1.tar.lz create mode 100644 testsuite/dotdot2.tar.lz create mode 100644 testsuite/dotdot3.tar.lz create mode 100644 testsuite/dotdot4.tar.lz create mode 100644 testsuite/dotdot5.tar.lz create mode 100644 testsuite/eoa_blocks.tar create mode 100644 testsuite/eoa_blocks.tar.lz create mode 100644 testsuite/rbar create mode 100644 testsuite/rbaz create mode 100644 testsuite/rfoo create mode 100644 testsuite/t155.tar create mode 100644 testsuite/t155.tar.lz create mode 100644 testsuite/t155_fv1.tar create mode 100644 testsuite/t155_fv1.tar.lz create mode 100644 testsuite/t155_fv2.tar create mode 100644 testsuite/t155_fv2.tar.lz create mode 100644 testsuite/t155_fv3.tar create mode 100644 testsuite/t155_fv3.tar.lz create mode 100644 testsuite/t155_fv4.tar.lz create mode 100644 testsuite/t155_fv5.tar.lz create mode 100644 testsuite/t155_fv6.tar.lz create mode 100644 testsuite/tar_in_tlz1.tar.lz create mode 100644 testsuite/tar_in_tlz2.tar.lz create mode 100644 testsuite/test.txt create mode 100644 testsuite/test.txt.lz create mode 100644 testsuite/test.txt.tar create mode 100644 testsuite/test.txt.tar.lz create mode 100644 testsuite/test3.tar create mode 100644 testsuite/test3.tar.lz create mode 100644 testsuite/test3_bad1.tar create mode 100644 testsuite/test3_bad1.tar.lz create mode 100644 testsuite/test3_bad2.tar create mode 100644 testsuite/test3_bad2.tar.lz create mode 100644 testsuite/test3_bad3.tar create mode 100644 testsuite/test3_bad3.tar.lz create mode 100644 testsuite/test3_bad4.tar create mode 100644 testsuite/test3_bad4.tar.lz create mode 100644 testsuite/test3_bad5.tar create mode 100644 testsuite/test3_bad5.tar.lz create mode 100644 testsuite/test3_bad6.tar.lz create mode 100644 testsuite/test3_dir.tar create mode 100644 testsuite/test3_dir.tar.lz create mode 100644 testsuite/test3_dot.tar.lz create mode 100644 testsuite/test3_em1.tar.lz create mode 100644 testsuite/test3_em2.tar.lz create mode 100644 testsuite/test3_em3.tar.lz create mode 100644 testsuite/test3_em4.tar.lz create mode 100644 testsuite/test3_em5.tar.lz create mode 100644 testsuite/test3_em6.tar.lz create mode 100644 testsuite/test3_eoa1.tar create mode 100644 testsuite/test3_eoa1.tar.lz create mode 100644 testsuite/test3_eoa2.tar create mode 100644 testsuite/test3_eoa2.tar.lz create mode 100644 testsuite/test3_eoa3.tar create mode 100644 testsuite/test3_eoa3.tar.lz create mode 100644 testsuite/test3_eoa4.tar create mode 100644 testsuite/test3_eoa4.tar.lz create mode 100644 testsuite/test3_eoa5.tar.lz create mode 100644 testsuite/test3_gh1.tar create mode 100644 testsuite/test3_gh1.tar.lz create mode 100644 testsuite/test3_gh2.tar create mode 100644 testsuite/test3_gh2.tar.lz create mode 100644 testsuite/test3_gh3.tar create mode 100644 testsuite/test3_gh3.tar.lz create mode 100644 testsuite/test3_gh4.tar create mode 100644 testsuite/test3_gh4.tar.lz create mode 100644 testsuite/test3_gh5.tar.lz create mode 100644 testsuite/test3_gh6.tar.lz create mode 100644 testsuite/test3_nn.tar create mode 100644 testsuite/test3_nn.tar.lz create mode 100644 testsuite/test3_sm1.tar.lz create mode 100644 testsuite/test3_sm2.tar.lz create mode 100644 testsuite/test3_sm3.tar.lz create mode 100644 testsuite/test3_sm4.tar.lz create mode 100644 testsuite/test_bad1.txt create mode 100644 testsuite/test_bad1.txt.tar create mode 100644 testsuite/test_bad1.txt.tar.lz create mode 100644 testsuite/test_bad2.txt create mode 100644 testsuite/test_bad2.txt.tar.lz create mode 100644 testsuite/tlz_in_tar1.tar create mode 100644 testsuite/tlz_in_tar2.tar create mode 100644 testsuite/ts_in_link.tar.lz create mode 100644 testsuite/ug32767.tar.lz create mode 100644 testsuite/ug32chars.tar.lz diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..a8fcb34 --- /dev/null +++ b/AUTHORS @@ -0,0 +1 @@ +Tarlz was written by Antonio Diaz Diaz. diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..4ad17ae --- /dev/null +++ b/COPYING @@ -0,0 +1,338 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..deba87a --- /dev/null +++ b/ChangeLog @@ -0,0 +1,215 @@ +2022-09-23 Antonio Diaz Diaz + + * Version 0.23 released. + * Create and decode the extended records 'atime' and 'mtime'. + * Create and decode the extended records 'uid' and 'gid'. + * New option '--ignore-overflow'. + * Refuse to read/write archive data from/to a terminal. + (Reported by DustDFG). + * main.cc (parse_mtime): Make time of day 'HH:MM:SS' optional. + Accept both space and 'T' as separator between date and time. + (show_option_error): New function showing argument and option name. + * decode.cc (extract_member): Diagnose intermediate directory failure. + Failure to extract a member is no longer fatal. + * decode_lz.cc: Make diagnostics identical to serial decoder. + * common_decode.cc (format_member_name): Improve column alignment. + * create.cc (fill_headers): Improve diagnostic when stat reports a + wrong st_size for a symbolic link. (Reported by Jason Lenz). + Change diagnostic "File is the archive" to "Archive can't contain + itself" following a similar change made by Paul Eggert to GNU tar. + * Don't show "Removing leading '/' from member names." if excluded. + * tarlz.texi: Change GNU Texinfo category from 'Data Compression' + to 'Archiving' to match that of GNU tar. + Use 'end-of-archive' (EOA) instead of 'end-of-file' (EOF). + * main.cc (show_help), tarlz.texi: List operations before options. + * Many small improvements have been made to code and documentation. + +2022-01-05 Antonio Diaz Diaz + + * Version 0.22 released. + * main.cc (getnum): Show option name and valid range if error. + (check_lib): Check that LZ_API_VERSION and LZ_version_string match. + (main): Report an error if -o is used with any operation except -z. + * Set variable LIBS from configure. + +2021-06-14 Antonio Diaz Diaz + + * Version 0.21 released. + * Lzlib 1.12 or newer is now required. + * decode.cc (decode): Skip members without name except when listing. + decode_lz.cc (dworker): Likewise. (Reported by Florian Schmaus). + * New options '-z, --compress' and '-o, --output'. + * New option '--warn-newer'. + * tarlz.texi (Portable character set): Link to moe section on Unicode. + (Invoking tarlz): Document concatenation to standard output. + * check.sh: Fix the '--diff' test on OS/2. + +2021-01-08 Antonio Diaz Diaz + + * Version 0.19 released. + * extended.cc: Print a diagnostic for each unknown keyword found. + * tarlz.h: Add a missing '#include '. + +2020-11-21 Antonio Diaz Diaz + + * Version 0.18 released. + * main.cc: New option '--check-lib'. + * Implement multi-threaded '-x, --extract'. + * Don't #include when compiling on OS2. + * delete.cc, delete_lz.cc: Use Archive_reader. + * extract.cc: Rename to decode.cc. + * tarlz.texi: New section 'Limitations of multi-threaded extraction'. + +2020-07-30 Antonio Diaz Diaz + + * Version 0.17 released. + * New option '--mtime'. + * New option '-p, --preserve-permissions'. + * Implement multi-threaded '-d, --diff'. + * list_lz.cc: Rename to decode_lz.cc. + * main.cc (main): Report an error if a file name is empty or if the + archive is specified more than once. + * lzip_index.cc: Improve messages for corruption in last header. + * Don't #include when compiling on BSD. + * tarlz.texi: New chapter 'Internal structure of tarlz'. + +2019-10-08 Antonio Diaz Diaz + + * Version 0.16 released. + * extract.cc (extract_member): Fix call order of chown, chmod. + * delete_lz.cc (delete_members_lz): Return 2 if collective member. + * main.cc: Set a valid invocation_name even if argc == 0. + * #include unconditionally. + * tarlz.texi: New chapter 'Portable character set'. + +2019-04-11 Antonio Diaz Diaz + + * Version 0.15 released. + * New option '--delete' (from uncompressed and --no-solid archives). + * list_lz.cc: Fix MT listing of archives with format violations. + +2019-03-12 Antonio Diaz Diaz + + * Version 0.14 released. + * New option '--exclude'. + * New option '-h, --dereference'. + * Short option name '-h' no longer means '--help'. + * create.cc: Implement '-A, --concatenate' and '-r, --append' to + uncompressed archives and to standard output. + * main.cc: Port option '--out-slots' from plzip. + +2019-02-27 Antonio Diaz Diaz + + * Version 0.13 released. + * create_lz.cc (cworker): Fix skipping of unreadable files. + * list_lz.cc: Fix listing of archives containing empty lzip members. + * create.cc (fill_headers): Store negative mtime as cero. + +2019-02-22 Antonio Diaz Diaz + + * Version 0.12 released. + * create.cc (fill_headers): Fix use of st_rdev instead of st_dev. + * Save just numerical uid/gid if user or group not in database. + * extract.cc (format_member_name): Print devmajor and devminor. + * New option '-d, --diff'. + * New option '--ignore-ids'. + * extract.cc: Fast '-t, --list' on seekable uncompressed archives. + +2019-02-13 Antonio Diaz Diaz + + * Version 0.11 released. + * extract.cc (archive_read): Fix endless loop with empty lz file. + * Implement multi-threaded '-c, --create' and '-r, --append'. + * '--bsolid' is now the default compression granularity. + * create.cc (remove_leading_dotslash): Remember more than one prefix. + * tarlz.texi: New chapter 'Minimum archive sizes'. + +2019-01-31 Antonio Diaz Diaz + + * Version 0.10 released. + * New option '--bsolid'. + * New option '-B, --data-size'. + * create.cc: Set ustar name to zero if extended header is used. + +2019-01-22 Antonio Diaz Diaz + + * Version 0.9 released. + * Implement multi-threaded '-t, --list'. + * New option '-n, --threads'. + * Recognize global pax headers. Ignore them for now. + * strtoul has been replaced with length-safe parsers. + * tarlz.texi: New chapter 'Limitations of parallel tar decoding'. + +2018-12-16 Antonio Diaz Diaz + + * Version 0.8 released. + * New option '--anonymous' (--owner=root --group=root). + * extract.cc (decode): 'tarlz -xf foo ./bar' now extracts 'bar'. + * create.cc: Set to zero most fields in extended headers. + * tarlz.texi: New chapter 'Amendments to pax format'. + +2018-11-23 Antonio Diaz Diaz + + * Version 0.7 released. + * New option '--keep-damaged'. + * New option '--no-solid'. + * create.cc (archive_write): Minimize dictionary size. + * create.cc: Detect and skip archive in '-A', '-c', and '-r'. + * main.cc (show_version): Show the version of lzlib being used. + +2018-10-19 Antonio Diaz Diaz + + * Version 0.6 released. + * New option '-A, --concatenate'. + * Option '--ignore-crc' replaced with '--missing-crc'. + * create.cc (add_member): Verify that uid, gid, mtime, devmajor, + and devminor are in ustar range. + * configure: Accept appending to CXXFLAGS; 'CXXFLAGS+=OPTIONS'. + * Makefile.in: Use tarlz in target 'dist'. + +2018-09-29 Antonio Diaz Diaz + + * Version 0.5 released. + * Implement simplified POSIX pax format. + * Implement CRC32-C (Castagnoli) of the extended header data. + * New option '--ignore-crc'. + * Add missing #includes for major, minor and makedev. + * tarlz.texi: Document the new archive format. + +2018-04-23 Antonio Diaz Diaz + + * Version 0.4 released. + * Add some missing #includes. + * main.cc: Open files in binary mode on OS2. + +2018-03-19 Antonio Diaz Diaz + + * Version 0.3 released. + * Rename project to 'tarlz' from 'pmtar' (Poor Man's Tar). + * New option '-C, --directory'. + * Implement lzip compression of members at archive creation. + * New option '-r, --append'. + * New options '--owner' and '--group'. + * New options '--asolid', '--dsolid', and '--solid'. + * Implement file appending to compressed archive. + * Implement transparent decompression of the archive. + * Implement skipping over damaged (un)compressed members. + * Implement recursive extraction/listing of directories. + * Implement verbose extract/list output. + * tarlz.texi: New file. + +2014-01-22 Antonio Diaz Diaz + + * Version 0.2 released. + * configure: Options now accept a separate argument. + +2013-02-16 Antonio Diaz Diaz + + * Version 0.1 released. + + +Copyright (C) 2013-2022 Antonio Diaz Diaz. + +This file is a collection of facts, and thus it is not copyrightable, +but just in case, you have unlimited permission to copy, distribute, and +modify it. diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..6fd5709 --- /dev/null +++ b/INSTALL @@ -0,0 +1,80 @@ +Requirements +------------ +You will need a C++98 compiler with support for 'long long', and the +compression library lzlib installed. (gcc 3.3.6 or newer is recommended). +I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards +compliant compiler. + +Lzlib must be version 1.12 or newer. + +Gcc is available at http://gcc.gnu.org. +Lzlib is available at http://www.nongnu.org/lzip/lzlib.html. + +The operating system must allow signal handlers read access to objects with +static storage duration so that the cleanup handler for Control-C can delete +the partial output file in '-z, --compress' mode. + + +Procedure +--------- +1. Unpack the archive if you have not done so already: + + tar -xf tarlz[version].tar.lz +or + lzip -cd tarlz[version].tar.lz | tar -xf - + +This creates the directory ./tarlz[version] containing the source from +the main archive. + +2. Change to tarlz directory and run configure. + (Try 'configure --help' for usage instructions). + + cd tarlz[version] + ./configure + + To link against a lzlib not installed in a standard place, use: + + ./configure CPPFLAGS='-I ' LDFLAGS='-L ' + + (Replace with the directory containing the file lzlib.h, + and with the directory containing the file liblz.a). + +3. Run make. + + make + +4. Optionally, type 'make check' to run the tests that come with tarlz. + +5. Type 'make install' to install the program and any data files and + documentation. + + Or type 'make install-compress', which additionally compresses the + info manual and the man page after installation. + (Installing compressed docs may become the default in the future). + + You can install only the program, the info manual, or the man page by + typing 'make install-bin', 'make install-info', or 'make install-man' + respectively. + + +Another way +----------- +You can also compile tarlz into a separate directory. +To do this, you must use a version of 'make' that supports the variable +'VPATH', such as GNU 'make'. 'cd' to the directory where you want the +object files and executables to go and run the 'configure' script. +'configure' automatically checks for the source code in '.', in '..', and +in the directory that 'configure' is in. + +'configure' recognizes the option '--srcdir=DIR' to control where to +look for the sources. Usually 'configure' can determine that directory +automatically. + +After running 'configure', you can run 'make' and 'make install' as +explained above. + + +Copyright (C) 2013-2022 Antonio Diaz Diaz. + +This file is free documentation: you have unlimited permission to copy, +distribute, and modify it. diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 0000000..0fa5761 --- /dev/null +++ b/Makefile.in @@ -0,0 +1,174 @@ + +DISTNAME = $(pkgname)-$(pkgversion) +INSTALL = install +INSTALL_PROGRAM = $(INSTALL) -m 755 +INSTALL_DATA = $(INSTALL) -m 644 +INSTALL_DIR = $(INSTALL) -d -m 755 +SHELL = /bin/sh +CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1 + +objs = arg_parser.o lzip_index.o archive_reader.o common.o common_decode.o \ + compress.o create.o create_lz.o decode.o decode_lz.o delete.o \ + delete_lz.o exclude.o extended.o main.o + + +.PHONY : all install install-bin install-info install-man \ + install-strip install-compress install-strip-compress \ + install-bin-strip install-info-compress install-man-compress \ + uninstall uninstall-bin uninstall-info uninstall-man \ + doc info man check dist clean distclean + +all : $(progname) + +$(progname) : $(objs) + $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs) $(LIBS) + +main.o : main.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $< + +%.o : %.cc + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $< + +$(objs) : Makefile +arg_parser.o : arg_parser.h +archive_reader.o : tarlz.h lzip_index.h archive_reader.h +common.o : tarlz.h arg_parser.h +common_decode.o : tarlz.h arg_parser.h +compress.o : tarlz.h arg_parser.h +create.o : tarlz.h arg_parser.h create.h +create_lz.o : tarlz.h arg_parser.h create.h +decode.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h decode.h +decode_lz.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h decode.h +delete.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h +delete_lz.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h +exclude.o : tarlz.h +extended.o : tarlz.h +lzip_index.o : tarlz.h lzip_index.h +main.o : tarlz.h arg_parser.h + + +doc : info man + +info : $(VPATH)/doc/$(pkgname).info + +$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi + cd $(VPATH)/doc && makeinfo $(pkgname).texi + +man : $(VPATH)/doc/$(progname).1 + +$(VPATH)/doc/$(progname).1 : $(progname) + help2man -n 'creates tar archives with multimember lzip compression' \ + -o $@ ./$(progname) + +Makefile : $(VPATH)/configure $(VPATH)/Makefile.in + ./config.status + +check : all + @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion) + +install : install-bin install-info install-man +install-strip : install-bin-strip install-info install-man +install-compress : install-bin install-info-compress install-man-compress +install-strip-compress : install-bin-strip install-info-compress install-man-compress + +install-bin : all + if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi + $(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)" + +install-bin-strip : all + $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install-bin + +install-info : + if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi + -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* + $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info" + -if $(CAN_RUN_INSTALLINFO) ; then \ + install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + fi + +install-info-compress : install-info + lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info" + +install-man : + if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi + -rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"* + $(INSTALL_DATA) $(VPATH)/doc/$(progname).1 "$(DESTDIR)$(mandir)/man1/$(progname).1" + +install-man-compress : install-man + lzip -v -9 "$(DESTDIR)$(mandir)/man1/$(progname).1" + +uninstall : uninstall-man uninstall-info uninstall-bin + +uninstall-bin : + -rm -f "$(DESTDIR)$(bindir)/$(progname)" + +uninstall-info : + -if $(CAN_RUN_INSTALLINFO) ; then \ + install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \ + fi + -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"* + +uninstall-man : + -rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"* + +dist : doc + ln -sf $(VPATH) $(DISTNAME) + tarlz --solid --anonymous -9cvf $(DISTNAME).tar.lz \ + $(DISTNAME)/AUTHORS \ + $(DISTNAME)/COPYING \ + $(DISTNAME)/ChangeLog \ + $(DISTNAME)/INSTALL \ + $(DISTNAME)/Makefile.in \ + $(DISTNAME)/NEWS \ + $(DISTNAME)/README \ + $(DISTNAME)/configure \ + $(DISTNAME)/doc/$(progname).1 \ + $(DISTNAME)/doc/$(pkgname).info \ + $(DISTNAME)/doc/$(pkgname).texi \ + $(DISTNAME)/*.h \ + $(DISTNAME)/*.cc \ + $(DISTNAME)/testsuite/check.sh \ + $(DISTNAME)/testsuite/test.txt \ + $(DISTNAME)/testsuite/test.txt.tar \ + $(DISTNAME)/testsuite/test_bad1.txt.tar \ + $(DISTNAME)/testsuite/test_bad[12].txt \ + $(DISTNAME)/testsuite/rfoo \ + $(DISTNAME)/testsuite/rbar \ + $(DISTNAME)/testsuite/rbaz \ + $(DISTNAME)/testsuite/test3.tar \ + $(DISTNAME)/testsuite/test3_nn.tar \ + $(DISTNAME)/testsuite/test3_eoa[1-4].tar \ + $(DISTNAME)/testsuite/test3_gh[1-4].tar \ + $(DISTNAME)/testsuite/test3_bad[1-5].tar \ + $(DISTNAME)/testsuite/test3_dir.tar \ + $(DISTNAME)/testsuite/t155.tar \ + $(DISTNAME)/testsuite/t155_fv[1-3].tar \ + $(DISTNAME)/testsuite/eoa_blocks.tar \ + $(DISTNAME)/testsuite/test.txt.lz \ + $(DISTNAME)/testsuite/test.txt.tar.lz \ + $(DISTNAME)/testsuite/test_bad[12].txt.tar.lz \ + $(DISTNAME)/testsuite/test3.tar.lz \ + $(DISTNAME)/testsuite/test3_eoa[1-5].tar.lz \ + $(DISTNAME)/testsuite/test3_em[1-6].tar.lz \ + $(DISTNAME)/testsuite/test3_gh[1-6].tar.lz \ + $(DISTNAME)/testsuite/test3_nn.tar.lz \ + $(DISTNAME)/testsuite/test3_sm[1-4].tar.lz \ + $(DISTNAME)/testsuite/test3_bad[1-6].tar.lz \ + $(DISTNAME)/testsuite/test3_dir.tar.lz \ + $(DISTNAME)/testsuite/test3_dot.tar.lz \ + $(DISTNAME)/testsuite/tar_in_tlz[12].tar.lz \ + $(DISTNAME)/testsuite/tlz_in_tar[12].tar \ + $(DISTNAME)/testsuite/ts_in_link.tar.lz \ + $(DISTNAME)/testsuite/t155.tar.lz \ + $(DISTNAME)/testsuite/t155_fv[1-6].tar.lz \ + $(DISTNAME)/testsuite/dotdot[1-5].tar.lz \ + $(DISTNAME)/testsuite/ug32767.tar.lz \ + $(DISTNAME)/testsuite/ug32chars.tar.lz \ + $(DISTNAME)/testsuite/eoa_blocks.tar.lz + rm -f $(DISTNAME) + +clean : + -rm -f $(progname) $(objs) + +distclean : clean + -rm -f Makefile config.status *.tar *.tar.lz diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..9d3f248 --- /dev/null +++ b/NEWS @@ -0,0 +1,53 @@ +Changes in version 0.23: + +Tarlz now can create and decode the extended records 'atime' and 'mtime', +allowing times beyond the ustar range (before 1970-01-01 00:00:00 UTC or +after 2242-03-16 12:56:31 UTC). + +Tarlz now can create and decode the extended records 'uid' and 'gid', +allowing user and group IDs beyond the ustar limit of 2_097_151. + +The new option '--ignore-overflow', which makes '-d, --diff' ignore +differences in mtime caused by overflow on 32-bit systems, has been added. + +Tarlz now refuses to read archive data from a terminal or write archive data +to a terminal. (Reported by DustDFG). + +In the date format of option '--mtime' the time of day 'HH:MM:SS' is now +optional and defaults to '00:00:00'. Both space and 'T' are now accepted as +separator between date and time. + +Diagnostics caused by invalid arguments to command line options now show the +argument and the name of the option. + +Tarlz now diagnoses separately the failure to create an intermediate +directory during extraction. + +Failure to extract a member due to environmental problems is no longer fatal +in serial extraction. (It was already non-fatal in parallel extraction). + +The diagnostics emitted by the parallel decoder should now be identical to +the corresponding diagnostics of the serial decoder. + +Column alignment has been improved in listings by printing "user/group size" +in a field of minimum width 19 with at least 8 characters for size. + +The diagnostic shown when the filesystem reports a wrong st_size for a +symbolic link has been improved. (Reported by Jason Lenz). + +The diagnostic "File is the archive" has been changed to "Archive can't +contain itself" following a similar change made by Paul Eggert to GNU tar. + +The warning "Removing leading '/' from member names." is now not shown when +compressing nor if the member causing it is excluded. + +The texinfo category of the manual has been changed from 'Data Compression' +to 'Archiving' to match that of GNU tar. + +'end-of-archive' (EOA) is now used consistently to refer to the blocks of +binary zeros used to mark the end of the archive. + +Operations are now listed before options in the --help output and in the +manual. + +Many small improvements have been made to the code and documentation. diff --git a/README b/README new file mode 100644 index 0000000..dedf17a --- /dev/null +++ b/README @@ -0,0 +1,97 @@ +Description + +Tarlz is a massively parallel (multi-threaded) combined implementation of +the tar archiver and the lzip compressor. Tarlz uses the compression library +lzlib. + +Tarlz creates tar archives using a simplified and safer variant of the POSIX +pax format compressed in lzip format, keeping the alignment between tar +members and lzip members. The resulting multimember tar.lz archive is fully +backward compatible with standard tar tools like GNU tar, which treat it +like any other tar.lz archive. Tarlz can append files to the end of such +compressed archives. + +Keeping the alignment between tar members and lzip members has two +advantages. It adds an indexed lzip layer on top of the tar archive, making +it possible to decode the archive safely in parallel. It also minimizes the +amount of data lost in case of corruption. Compressing a tar archive with +plzip may even double the amount of files lost for each lzip member damaged +because it does not keep the members aligned. + +Tarlz can create tar archives with five levels of compression granularity: +per file (--no-solid), per block (--bsolid, default), per directory +(--dsolid), appendable solid (--asolid), and solid (--solid). It can also +create uncompressed tar archives. + +Of course, compressing each file (or each directory) individually can't +achieve a compression ratio as high as compressing solidly the whole tar +archive, but it has the following advantages: + + * The resulting multimember tar.lz archive can be decompressed in + parallel, multiplying the decompression speed. + + * New members can be appended to the archive (by removing the + end-of-archive member), and unwanted members can be deleted from the + archive. Just like an uncompressed tar archive. + + * It is a safe POSIX-style backup format. In case of corruption, tarlz + can extract all the undamaged members from the tar.lz archive, + skipping over the damaged members, just like the standard + (uncompressed) tar. Moreover, the option '--keep-damaged' can be used + to recover as much data as possible from each damaged member, and + lziprecover can be used to recover some of the damaged members. + + * A multimember tar.lz archive is usually smaller than the corresponding + solidly compressed tar.gz archive, except when individually + compressing files smaller than about 32 KiB. + +Note that the POSIX pax format has a serious flaw. The metadata stored in +pax extended records are not protected by any kind of check sequence. +Corruption in a long file name may cause the extraction of the file in the +wrong place without warning. Corruption in a large file size may cause the +truncation of the file or the appending of garbage to the file, both +followed by a spurious warning about a corrupt header far from the place of +the undetected corruption. + +Metadata like file name and file size must be always protected in an archive +format because of the adverse effects of undetected corruption in them, +potentially much worse that undetected corruption in the data. Even more so +in the case of pax because the amount of metadata it stores is potentially +large, making undetected corruption and archiver misbehavior more probable. + +Headers and metadata must be protected separately from data because the +integrity checking of lzip may not be able to detect the corruption before +the metadata has been used, for example, to create a new file in the wrong +place. + +Because of the above, tarlz protects the extended records with a Cyclic +Redundancy Check (CRC) in a way compatible with standard tar tools. + +Tarlz does not understand other tar formats like gnu, oldgnu, star or v7. +The command 'tarlz -tf archive.tar.lz > /dev/null' can be used to verify +that the format of the archive is compatible with tarlz. + +The diagram below shows the correspondence between each tar member (formed +by one or two headers plus optional data) in the tar archive and each lzip +member in the resulting multimember tar.lz archive, when per file +compression is used: + +tar ++========+======+=================+===============+========+======+========+ +| header | data | extended header | extended data | header | data | EOA | ++========+======+=================+===============+========+======+========+ + +tar.lz ++===============+=================================================+========+ +| member | member | member | ++===============+=================================================+========+ + + +Copyright (C) 2013-2022 Antonio Diaz Diaz. + +This file is free documentation: you have unlimited permission to copy, +distribute, and modify it. + +The file Makefile.in is a data file used by configure to produce the +Makefile. It has the same copyright owner and permissions that configure +itself. diff --git a/archive_reader.cc b/archive_reader.cc new file mode 100644 index 0000000..8ad315d --- /dev/null +++ b/archive_reader.cc @@ -0,0 +1,273 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include // for lzlib.h +#include +#include + +#include "tarlz.h" +#include "lzip_index.h" +#include "archive_reader.h" + + +namespace { + +const char * const rdaerr_msg = "Error reading archive"; + +/* Return the number of bytes really read. + If (value returned < size) and (errno == 0), means EOF was reached. +*/ +int preadblock( const int fd, uint8_t * const buf, const int size, + const long long pos ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = pread( fd, buf + sz, size - sz, pos + sz ); + if( n > 0 ) sz += n; + else if( n == 0 ) break; // EOF + else if( errno != EINTR ) break; + errno = 0; + } + return sz; + } + +int non_tty_infd( const std::string & archive_name, const char * const namep ) + { + int infd = archive_name.empty() ? STDIN_FILENO : open_instream( archive_name ); + if( infd >= 0 && isatty( infd ) ) // for example /dev/tty + { show_file_error( namep, archive_name.empty() ? + "I won't read archive data from a terminal (missing -f option?)" : + "I won't read archive data from a terminal." ); + close( infd ); infd = -1; } + return infd; + } + + +void xLZ_decompress_write( LZ_Decoder * const decoder, + const uint8_t * const buffer, const int size ) + { + if( LZ_decompress_write( decoder, buffer, size ) != size ) + internal_error( "library error (LZ_decompress_write)." ); + } + +} // end namespace + + +Archive_descriptor::Archive_descriptor( const std::string & archive_name ) + : name( archive_name ), namep( name.empty() ? "(stdin)" : name.c_str() ), + infd( non_tty_infd( archive_name, namep ) ), + lzip_index( infd, true, false ), + seekable( lseek( infd, 0, SEEK_SET ) == 0 ), + indexed( seekable && lzip_index.retval() == 0 ) {} + + +int Archive_reader_base::parse_records( Extended & extended, + const Tar_header header, + Resizable_buffer & rbuf, + const char * const default_msg, + const bool permissive ) + { + const long long edsize = parse_octal( header + size_o, size_l ); + const long long bufsize = round_up( edsize ); + if( edsize <= 0 ) return err( 2, misrec_msg ); // no extended records + if( edsize >= 1LL << 33 || bufsize >= INT_MAX ) + return err( -2, longrec_msg ); // records too long + if( !rbuf.resize( bufsize ) ) return err( -1, mem_msg ); + e_msg_ = ""; e_code_ = 0; + int retval = read( rbuf.u8(), bufsize ); // extended records buffer + if( retval == 0 && !extended.parse( rbuf(), edsize, permissive ) ) + retval = 2; + if( retval && !*e_msg_ ) e_msg_ = default_msg; + return retval; + } + + +/* Read 'size' uncompressed bytes, decompressing the input if needed. + Return value: 0 = OK, 1 = OOM or read error, 2 = EOF or invalid data. */ +int Archive_reader::read( uint8_t * const buf, const int size ) + { + if( first_read ) // check format + { + first_read = false; + uncompressed_seekable = ad.seekable && !ad.indexed && + ad.lzip_index.file_size() > 3 * header_size; + if( size != header_size ) + internal_error( "size != header_size on first call." ); + const int rd = readblock( ad.infd, buf, size ); + if( rd != size && errno ) return err( -1, rdaerr_msg, errno, rd ); + const Lzip_header & header = (*(const Lzip_header *)buf); + const bool islz = ( rd >= min_member_size && header.verify_magic() && + header.verify_version() && + isvalid_ds( header.dictionary_size() ) ); + const bool istar = ( rd == size && verify_ustar_chksum( buf ) ); + const bool iseoa = + ( !islz && !istar && rd == size && block_is_zero( buf, size ) ); + bool maybe_lz = islz; // maybe corrupt tar.lz + if( !islz && !istar && !iseoa ) // corrupt or invalid format + { + const bool lz_ext = has_lz_ext( ad.name ); + show_file_error( ad.namep, lz_ext ? posix_lz_msg : posix_msg ); + if( lz_ext && rd >= min_member_size ) maybe_lz = true; + else return err( 2 ); + } + if( !maybe_lz ) // uncompressed + { if( rd == size ) return 0; + return err( -2, "EOF reading archive.", 0, rd ); } + uncompressed_seekable = false; // compressed + decoder = LZ_decompress_open(); + if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) + { LZ_decompress_close( decoder ); decoder = 0; return err( -1, mem_msg ); } + xLZ_decompress_write( decoder, buf, rd ); + const int ret = read( buf, size ); if( ret != 0 ) return ret; + if( verify_ustar_chksum( buf ) || block_is_zero( buf, size ) ) return 0; + return err( 2, islz ? posix_lz_msg : "" ); + } + + if( !decoder ) // uncompressed + { + const int rd = readblock( ad.infd, buf, size ); + if( rd == size ) return 0; else return err( -2, end_msg, 0, rd ); + } + const int ibuf_size = 16384; + uint8_t ibuf[ibuf_size]; + int sz = 0; + while( sz < size ) + { + const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); + if( rd < 0 ) + { + const unsigned long long old_pos = LZ_decompress_total_in_size( decoder ); + if( LZ_decompress_sync_to_member( decoder ) < 0 ) + internal_error( "library error (LZ_decompress_sync_to_member)." ); + e_skip_ = true; set_error_status( 2 ); + const unsigned long long new_pos = LZ_decompress_total_in_size( decoder ); + // lzlib < 1.8 does not update total_in_size when syncing to member + if( new_pos >= old_pos && new_pos < LLONG_MAX ) + return err( 2, "", 0, sz, true ); + return err( -1, "Skipping to next header failed. " + "Lzlib 1.8 or newer required.", 0, sz ); + } + if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) + { return err( -2, end_msg, 0, sz ); } + sz += rd; + if( sz < size && !at_eof && LZ_decompress_write_size( decoder ) > 0 ) + { + const int rsize = std::min( ibuf_size, LZ_decompress_write_size( decoder ) ); + const int rd = readblock( ad.infd, ibuf, rsize ); + xLZ_decompress_write( decoder, ibuf, rd ); + if( rd < rsize ) + { + at_eof = true; LZ_decompress_finish( decoder ); + if( errno ) return err( -1, rdaerr_msg, errno, sz ); + } + } + } + return 0; + } + + +int Archive_reader::skip_member( const Extended & extended ) + { + if( extended.file_size() <= 0 ) return 0; + long long rest = round_up( extended.file_size() ); // size + padding + if( uncompressed_seekable && lseek( ad.infd, rest, SEEK_CUR ) > 0 ) return 0; + const int bufsize = 32 * header_size; + uint8_t buf[bufsize]; + while( rest > 0 ) // skip tar member + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest; + const int ret = read( buf, rsize ); + if( ret != 0 ) return ret; + rest -= rsize; + } + return 0; + } + + +void Archive_reader_i::set_member( const long i ) + { + LZ_decompress_reset( decoder ); // prepare for new member + data_pos_ = ad.lzip_index.dblock( i ).pos(); + mdata_end_ = ad.lzip_index.dblock( i ).end(); + archive_pos = ad.lzip_index.mblock( i ).pos(); + member_id = i; + } + + +/* Read 'size' decompressed bytes from the archive. + Return value: 0 = OK, 1 = OOM or read error, 2 = EOF or invalid data. */ +int Archive_reader_i::read( uint8_t * const buf, const int size ) + { + int sz = 0; + + while( sz < size ) + { + const int rd = LZ_decompress_read( decoder, buf + sz, size - sz ); + if( rd < 0 ) + return err( 2, LZ_strerror( LZ_decompress_errno( decoder ) ), 0, sz ); + if( rd == 0 && LZ_decompress_finished( decoder ) == 1 ) + return err( -2, end_msg, 0, sz ); + sz += rd; data_pos_ += rd; + if( sz < size && LZ_decompress_write_size( decoder ) > 0 ) + { + const long long ibuf_size = 16384; + uint8_t ibuf[ibuf_size]; + const long long member_end = ad.lzip_index.mblock( member_id ).end(); + const long long rest = ( ( archive_pos < member_end ) ? + member_end : ad.lzip_index.cdata_size() ) - archive_pos; + const int rsize = std::min( LZ_decompress_write_size( decoder ), + (int)std::min( ibuf_size, rest ) ); + if( rsize <= 0 ) LZ_decompress_finish( decoder ); + else + { + const int rd = preadblock( ad.infd, ibuf, rsize, archive_pos ); + xLZ_decompress_write( decoder, ibuf, rd ); + archive_pos += rd; + if( rd < rsize ) + { + LZ_decompress_finish( decoder ); + if( errno ) return err( -1, rdaerr_msg, errno, sz ); + } + } + } + } + return 0; + } + + +int Archive_reader_i::skip_member( const Extended & extended ) + { + if( extended.file_size() <= 0 ) return 0; + long long rest = round_up( extended.file_size() ); // size + padding + if( data_pos_ + rest == mdata_end_ ) { data_pos_ = mdata_end_; return 0; } + const int bufsize = 32 * header_size; + uint8_t buf[bufsize]; + while( rest > 0 ) // skip tar member + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest; + const int ret = read( buf, rsize ); + if( ret != 0 ) return ret; + rest -= rsize; + } + return 0; + } diff --git a/archive_reader.h b/archive_reader.h new file mode 100644 index 0000000..47fa844 --- /dev/null +++ b/archive_reader.h @@ -0,0 +1,120 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +struct Archive_descriptor + { + const std::string name; + const char * const namep; // printable archive name + const int infd; + const Lzip_index lzip_index; + const bool seekable; + const bool indexed; // archive is a compressed regular file + + Archive_descriptor( const std::string & archive_name ); + }; + + +class Archive_reader_base // base of serial and indexed readers + { +public: + const Archive_descriptor & ad; +protected: + LZ_Decoder * decoder; // destructor closes it if needed + const char * e_msg_; // message for show_file_error + int e_code_; // copy of errno + int e_size_; // partial size read in case of read error + bool e_skip_; // corrupt header skipped + bool fatal_; + + int err( const int retval, const char * const msg = "", const int code = 0, + const int size = 0, const bool skip = false ) + { e_msg_ = msg; e_code_ = code; e_size_ = size; e_skip_ = skip; + if( retval >= 0 ) return retval; + fatal_ = true; if( !*e_msg_ ) e_msg_ = "Fatal error"; return -retval; } + + Archive_reader_base( const Archive_descriptor & d ) + : ad( d ), decoder( 0 ), e_msg_( "" ), e_code_( 0 ), e_size_( 0 ), + e_skip_( false ), fatal_( false ) {} + +public: + virtual ~Archive_reader_base() + { if( decoder != 0 ) LZ_decompress_close( decoder ); } + + const char * e_msg() const { return e_msg_; } + int e_code() const { return e_code_; } + int e_size() const { return e_size_; } + bool e_skip() const { return e_skip_; } + bool fatal() const { return fatal_; } + + /* Read 'size' uncompressed bytes, decompressing the input if needed. + Return value: 0 = OK, 1 = OOM or read error, 2 = EOF or invalid data. + If !OK, fills all the e_* variables. */ + virtual int read( uint8_t * const buf, const int size ) = 0; + + int parse_records( Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const char * const default_msg, + const bool permissive ); + }; + + +class Archive_reader : public Archive_reader_base // serial reader + { + bool first_read; + bool uncompressed_seekable; // value set by first read call + bool at_eof; + +public: + Archive_reader( const Archive_descriptor & d ) + : Archive_reader_base( d ), first_read( true ), + uncompressed_seekable( false ), at_eof( false ) {} + + int read( uint8_t * const buf, const int size ); + int skip_member( const Extended & extended ); + }; + + +/* If the archive is compressed seekable (indexed), several indexed readers + can be constructed sharing the same Archive_descriptor, for example to + decode the archive in parallel. +*/ +class Archive_reader_i : public Archive_reader_base // indexed reader + { + long long data_pos_; // current decompressed position in archive + long long mdata_end_; // current member decompressed end + long long archive_pos; // current position in archive for pread + long member_id; // current member unless reading beyond + +public: + Archive_reader_i( const Archive_descriptor & d ) + : Archive_reader_base( d ), + data_pos_( 0 ), mdata_end_( 0 ), archive_pos( 0 ), member_id( 0 ) + { + decoder = LZ_decompress_open(); + if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ) + { LZ_decompress_close( decoder ); decoder = 0; fatal_ = true; } + } + + long long data_pos() const { return data_pos_; } + long long mdata_end() const { return mdata_end_; } + bool at_member_end() const { return data_pos_ == mdata_end_; } + + // Resets decoder and sets position to the start of the member. + void set_member( const long i ); + + int read( uint8_t * const buf, const int size ); + int skip_member( const Extended & extended ); + }; diff --git a/arg_parser.cc b/arg_parser.cc new file mode 100644 index 0000000..59998ac --- /dev/null +++ b/arg_parser.cc @@ -0,0 +1,197 @@ +/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) + Copyright (C) 2006-2022 Antonio Diaz Diaz. + + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#include +#include +#include + +#include "arg_parser.h" + + +bool Arg_parser::parse_long_option( const char * const opt, const char * const arg, + const Option options[], int & argind ) + { + unsigned len; + int index = -1; + bool exact = false, ambig = false; + + for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ; + + // Test all long options for either exact match or abbreviated matches. + for( int i = 0; options[i].code != 0; ++i ) + if( options[i].long_name && + std::strncmp( options[i].long_name, &opt[2], len ) == 0 ) + { + if( std::strlen( options[i].long_name ) == len ) // Exact match found + { index = i; exact = true; break; } + else if( index < 0 ) index = i; // First nonexact match found + else if( options[index].code != options[i].code || + options[index].has_arg != options[i].has_arg ) + ambig = true; // Second or later nonexact match found + } + + if( ambig && !exact ) + { + error_ = "option '"; error_ += opt; error_ += "' is ambiguous"; + return false; + } + + if( index < 0 ) // nothing found + { + error_ = "unrecognized option '"; error_ += opt; error_ += '\''; + return false; + } + + ++argind; + data.push_back( Record( options[index].code, options[index].long_name ) ); + + if( opt[len+2] ) // '--=' syntax + { + if( options[index].has_arg == no ) + { + error_ = "option '--"; error_ += options[index].long_name; + error_ += "' doesn't allow an argument"; + return false; + } + if( options[index].has_arg == yes && !opt[len+3] ) + { + error_ = "option '--"; error_ += options[index].long_name; + error_ += "' requires an argument"; + return false; + } + data.back().argument = &opt[len+3]; + return true; + } + + if( options[index].has_arg == yes ) + { + if( !arg || !arg[0] ) + { + error_ = "option '--"; error_ += options[index].long_name; + error_ += "' requires an argument"; + return false; + } + ++argind; data.back().argument = arg; + return true; + } + + return true; + } + + +bool Arg_parser::parse_short_option( const char * const opt, const char * const arg, + const Option options[], int & argind ) + { + int cind = 1; // character index in opt + + while( cind > 0 ) + { + int index = -1; + const unsigned char c = opt[cind]; + + if( c != 0 ) + for( int i = 0; options[i].code; ++i ) + if( c == options[i].code ) + { index = i; break; } + + if( index < 0 ) + { + error_ = "invalid option -- '"; error_ += c; error_ += '\''; + return false; + } + + data.push_back( Record( c ) ); + if( opt[++cind] == 0 ) { ++argind; cind = 0; } // opt finished + + if( options[index].has_arg != no && cind > 0 && opt[cind] ) + { + data.back().argument = &opt[cind]; ++argind; cind = 0; + } + else if( options[index].has_arg == yes ) + { + if( !arg || !arg[0] ) + { + error_ = "option requires an argument -- '"; error_ += c; + error_ += '\''; + return false; + } + data.back().argument = arg; ++argind; cind = 0; + } + } + return true; + } + + +Arg_parser::Arg_parser( const int argc, const char * const argv[], + const Option options[], const bool in_order ) + { + if( argc < 2 || !argv || !options ) return; + + std::vector< const char * > non_options; // skipped non-options + int argind = 1; // index in argv + + while( argind < argc ) + { + const unsigned char ch1 = argv[argind][0]; + const unsigned char ch2 = ch1 ? argv[argind][1] : 0; + + if( ch1 == '-' && ch2 ) // we found an option + { + const char * const opt = argv[argind]; + const char * const arg = ( argind + 1 < argc ) ? argv[argind+1] : 0; + if( ch2 == '-' ) + { + if( !argv[argind][2] ) { ++argind; break; } // we found "--" + else if( !parse_long_option( opt, arg, options, argind ) ) break; + } + else if( !parse_short_option( opt, arg, options, argind ) ) break; + } + else + { + if( in_order ) data.push_back( Record( argv[argind++] ) ); + else non_options.push_back( argv[argind++] ); + } + } + if( !error_.empty() ) data.clear(); + else + { + for( unsigned i = 0; i < non_options.size(); ++i ) + data.push_back( Record( non_options[i] ) ); + while( argind < argc ) + data.push_back( Record( argv[argind++] ) ); + } + } + + +Arg_parser::Arg_parser( const char * const opt, const char * const arg, + const Option options[] ) + { + if( !opt || !opt[0] || !options ) return; + + if( opt[0] == '-' && opt[1] ) // we found an option + { + int argind = 1; // dummy + if( opt[1] == '-' ) + { if( opt[2] ) parse_long_option( opt, arg, options, argind ); } + else + parse_short_option( opt, arg, options, argind ); + if( !error_.empty() ) data.clear(); + } + else data.push_back( Record( opt ) ); + } diff --git a/arg_parser.h b/arg_parser.h new file mode 100644 index 0000000..e854838 --- /dev/null +++ b/arg_parser.h @@ -0,0 +1,110 @@ +/* Arg_parser - POSIX/GNU command line argument parser. (C++ version) + Copyright (C) 2006-2022 Antonio Diaz Diaz. + + This library is free software. Redistribution and use in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +/* Arg_parser reads the arguments in 'argv' and creates a number of + option codes, option arguments, and non-option arguments. + + In case of error, 'error' returns a non-empty error message. + + 'options' is an array of 'struct Option' terminated by an element + containing a code which is zero. A null long_name means a short-only + option. A code value outside the unsigned char range means a long-only + option. + + Arg_parser normally makes it appear as if all the option arguments + were specified before all the non-option arguments for the purposes + of parsing, even if the user of your program intermixed option and + non-option arguments. If you want the arguments in the exact order + the user typed them, call 'Arg_parser' with 'in_order' = true. + + The argument '--' terminates all options; any following arguments are + treated as non-option arguments, even if they begin with a hyphen. + + The syntax for optional option arguments is '-' + (without whitespace), or '--='. +*/ + +class Arg_parser + { +public: + enum Has_arg { no, yes, maybe }; + + struct Option + { + int code; // Short option letter or code ( code != 0 ) + const char * long_name; // Long option name (maybe null) + Has_arg has_arg; + }; + +private: + struct Record + { + int code; + std::string parsed_name; + std::string argument; + explicit Record( const unsigned char c ) + : code( c ), parsed_name( "-" ) { parsed_name += c; } + Record( const int c, const char * const long_name ) + : code( c ), parsed_name( "--" ) { parsed_name += long_name; } + explicit Record( const char * const arg ) : code( 0 ), argument( arg ) {} + }; + + const std::string empty_arg; + std::string error_; + std::vector< Record > data; + + bool parse_long_option( const char * const opt, const char * const arg, + const Option options[], int & argind ); + bool parse_short_option( const char * const opt, const char * const arg, + const Option options[], int & argind ); + +public: + Arg_parser( const int argc, const char * const argv[], + const Option options[], const bool in_order = false ); + + // Restricted constructor. Parses a single token and argument (if any). + Arg_parser( const char * const opt, const char * const arg, + const Option options[] ); + + const std::string & error() const { return error_; } + + // The number of arguments parsed. May be different from argc. + int arguments() const { return data.size(); } + + /* If code( i ) is 0, argument( i ) is a non-option. + Else argument( i ) is the option's argument (or empty). */ + int code( const int i ) const + { + if( i >= 0 && i < arguments() ) return data[i].code; + else return 0; + } + + // Full name of the option parsed (short or long). + const std::string & parsed_name( const int i ) const + { + if( i >= 0 && i < arguments() ) return data[i].parsed_name; + else return empty_arg; + } + + const std::string & argument( const int i ) const + { + if( i >= 0 && i < arguments() ) return data[i].argument; + else return empty_arg; + } + }; diff --git a/common.cc b/common.cc new file mode 100644 index 0000000..444280d --- /dev/null +++ b/common.cc @@ -0,0 +1,151 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include + +#include "tarlz.h" +#include "arg_parser.h" + + +void xinit_mutex( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_init( mutex, 0 ); + if( errcode ) + { show_error( "pthread_mutex_init", errcode ); exit_fail_mt(); } + } + +void xinit_cond( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_init( cond, 0 ); + if( errcode ) + { show_error( "pthread_cond_init", errcode ); exit_fail_mt(); } + } + + +void xdestroy_mutex( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_destroy( mutex ); + if( errcode ) + { show_error( "pthread_mutex_destroy", errcode ); exit_fail_mt(); } + } + +void xdestroy_cond( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_destroy( cond ); + if( errcode ) + { show_error( "pthread_cond_destroy", errcode ); exit_fail_mt(); } + } + + +void xlock( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_lock( mutex ); + if( errcode ) + { show_error( "pthread_mutex_lock", errcode ); exit_fail_mt(); } + } + + +void xunlock( pthread_mutex_t * const mutex ) + { + const int errcode = pthread_mutex_unlock( mutex ); + if( errcode ) + { show_error( "pthread_mutex_unlock", errcode ); exit_fail_mt(); } + } + + +void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex ) + { + const int errcode = pthread_cond_wait( cond, mutex ); + if( errcode ) + { show_error( "pthread_cond_wait", errcode ); exit_fail_mt(); } + } + + +void xsignal( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_signal( cond ); + if( errcode ) + { show_error( "pthread_cond_signal", errcode ); exit_fail_mt(); } + } + + +void xbroadcast( pthread_cond_t * const cond ) + { + const int errcode = pthread_cond_broadcast( cond ); + if( errcode ) + { show_error( "pthread_cond_broadcast", errcode ); exit_fail_mt(); } + } + + +unsigned long long parse_octal( const uint8_t * const ptr, const int size ) + { + unsigned long long result = 0; + int i = 0; + while( i < size && std::isspace( ptr[i] ) ) ++i; + for( ; i < size && ptr[i] >= '0' && ptr[i] <= '7'; ++i ) + { result <<= 3; result += ptr[i] - '0'; } + return result; + } + + +/* Return the number of bytes really read. + If (value returned < size) and (errno == 0), means EOF was reached. +*/ +int readblock( const int fd, uint8_t * const buf, const int size ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = read( fd, buf + sz, size - sz ); + if( n > 0 ) sz += n; + else if( n == 0 ) break; // EOF + else if( errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +/* Return the number of bytes really written. + If (value returned < size), it is always an error. +*/ +int writeblock( const int fd, const uint8_t * const buf, const int size ) + { + int sz = 0; + errno = 0; + while( sz < size ) + { + const int n = write( fd, buf + sz, size - sz ); + if( n > 0 ) sz += n; + else if( n < 0 && errno != EINTR ) break; + errno = 0; + } + return sz; + } + + +bool nonempty_arg( const Arg_parser & parser, const int i ) + { + return ( parser.code( i ) == 0 && !parser.argument( i ).empty() ); + } diff --git a/common_decode.cc b/common_decode.cc new file mode 100644 index 0000000..835687f --- /dev/null +++ b/common_decode.cc @@ -0,0 +1,243 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include + +#include "tarlz.h" +#include "arg_parser.h" + + +namespace { + +enum { mode_string_size = 10, + group_string_size = 1 + uname_l + 1 + gname_l + 1 }; // 67 + +void format_mode_string( const Tar_header header, char buf[mode_string_size] ) + { + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + + std::memcpy( buf, "----------", mode_string_size ); + switch( typeflag ) + { + case tf_regular: break; + case tf_link: buf[0] = 'h'; break; + case tf_symlink: buf[0] = 'l'; break; + case tf_chardev: buf[0] = 'c'; break; + case tf_blockdev: buf[0] = 'b'; break; + case tf_directory: buf[0] = 'd'; break; + case tf_fifo: buf[0] = 'p'; break; + case tf_hiperf: buf[0] = 'C'; break; + default: buf[0] = '?'; + } + const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits + const bool setuid = mode & S_ISUID; + const bool setgid = mode & S_ISGID; + const bool sticky = mode & S_ISVTX; + if( mode & S_IRUSR ) buf[1] = 'r'; + if( mode & S_IWUSR ) buf[2] = 'w'; + if( mode & S_IXUSR ) buf[3] = setuid ? 's' : 'x'; + else if( setuid ) buf[3] = 'S'; + if( mode & S_IRGRP ) buf[4] = 'r'; + if( mode & S_IWGRP ) buf[5] = 'w'; + if( mode & S_IXGRP ) buf[6] = setgid ? 's' : 'x'; + else if( setgid ) buf[6] = 'S'; + if( mode & S_IROTH ) buf[7] = 'r'; + if( mode & S_IWOTH ) buf[8] = 'w'; + if( mode & S_IXOTH ) buf[9] = sticky ? 't' : 'x'; + else if( sticky ) buf[9] = 'T'; + } + + +int format_user_group_string( const Extended & extended, + const Tar_header header, + char buf[group_string_size] ) + { + int len; + if( header[uname_o] && header[gname_o] ) + len = snprintf( buf, group_string_size, + " %.32s/%.32s", header + uname_o, header + gname_o ); + else + len = snprintf( buf, group_string_size, " %llu/%llu", + extended.get_uid(), extended.get_gid() ); + return len; + } + + +// return true if dir is a parent directory of name +bool compare_prefix_dir( const char * const dir, const char * const name ) + { + int len = 0; + while( dir[len] && dir[len] == name[len] ) ++len; + return ( !dir[len] && len > 0 && ( dir[len-1] == '/' || name[len] == '/' ) ); + } + + +// compare two file names ignoring trailing slashes +bool compare_tslash( const char * const name1, const char * const name2 ) + { + const char * p = name1; + const char * q = name2; + while( *p && *p == *q ) { ++p; ++q; } + while( *p == '/' ) ++p; + while( *q == '/' ) ++q; + return ( !*p && !*q ); + } + +} // end namespace + + +bool block_is_zero( const uint8_t * const buf, const int size ) + { + for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false; + return true; + } + + +bool format_member_name( const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const bool long_format ) + { + if( long_format ) + { + format_mode_string( header, rbuf() ); + const int group_string_len = + format_user_group_string( extended, header, rbuf() + mode_string_size ); + int offset = mode_string_size + group_string_len; + const time_t mtime = extended.mtime().sec(); + struct tm t; + if( !localtime_r( &mtime, &t ) ) // if local time fails + { time_t z = 0; if( !gmtime_r( &z, &t ) ) // use the UTC epoch + { t.tm_year = 70; t.tm_mon = t.tm_hour = t.tm_min = 0; t.tm_mday = 1; } } + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); + const char * const link_string = !islink ? "" : + ( ( typeflag == tf_link ) ? " link to " : " -> " ); + // print "user/group size" in a field of width 19 with 8 or more for size + if( typeflag == tf_chardev || typeflag == tf_blockdev ) + { + const unsigned devmajor = parse_octal( header + devmajor_o, devmajor_l ); + const unsigned devminor = parse_octal( header + devminor_o, devminor_l ); + const int width = std::max( 1, + std::max( 8, 19 - group_string_len ) - 1 - decimal_digits( devminor ) ); + offset += snprintf( rbuf() + offset, rbuf.size() - offset, " %*u,%u", + width, devmajor, devminor ); + } + else + { + const int width = std::max( 8, 19 - group_string_len ); + offset += snprintf( rbuf() + offset, rbuf.size() - offset, " %*llu", + width, extended.file_size() ); + } + for( int i = 0; i < 2; ++i ) // resize rbuf if not large enough + { + const int len = snprintf( rbuf() + offset, rbuf.size() - offset, + " %4d-%02u-%02u %02u:%02u %s%s%s\n", + 1900 + t.tm_year, 1 + t.tm_mon, t.tm_mday, t.tm_hour, + t.tm_min, extended.path().c_str(), link_string, + islink ? extended.linkpath().c_str() : "" ); + if( len + offset < (int)rbuf.size() ) break; + if( !rbuf.resize( len + offset + 1 ) ) return false; + } + } + else + { + if( rbuf.size() < extended.path().size() + 2 && + !rbuf.resize( extended.path().size() + 2 ) ) return false; + snprintf( rbuf(), rbuf.size(), "%s\n", extended.path().c_str() ); + } + return true; + } + + +bool show_member_name( const Extended & extended, const Tar_header header, + const int vlevel, Resizable_buffer & rbuf ) + { + if( verbosity >= vlevel ) + { + if( !format_member_name( extended, header, rbuf, verbosity > vlevel ) ) + { show_error( mem_msg ); return false; } + std::fputs( rbuf(), stdout ); + std::fflush( stdout ); + } + return true; + } + + +bool check_skip_filename( const Cl_options & cl_opts, + std::vector< char > & name_pending, + const char * const filename ) + { + if( Exclude::excluded( filename ) ) return true; // skip excluded files + bool skip = cl_opts.num_files > 0; // if no files specified, skip nothing + if( skip ) // else skip all but the files (or trees) specified + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + if( nonempty_arg( cl_opts.parser, i ) ) + { + std::string removed_prefix; + const char * const name = remove_leading_dotslash( + cl_opts.parser.argument( i ).c_str(), &removed_prefix ); + if( compare_prefix_dir( name, filename ) || + compare_tslash( name, filename ) ) + { print_removed_prefix( removed_prefix ); + skip = false; name_pending[i] = false; break; } + } + return skip; + } + + +mode_t get_umask() + { + static mode_t mask = 0; // read once, cache the result + static bool first_call = true; + if( first_call ) { first_call = false; mask = umask( 0 ); umask( mask ); + mask &= S_IRWXU | S_IRWXG | S_IRWXO; } + return mask; + } + + +bool make_path( const std::string & name ) + { + const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; + unsigned end = name.size(); // first slash before last component + + while( end > 0 && name[end-1] == '/' ) --end; // remove trailing slashes + while( end > 0 && name[end-1] != '/' ) --end; // remove last component + while( end > 0 && name[end-1] == '/' ) --end; // remove more slashes + + unsigned index = 0; + while( index < end ) + { + while( index < end && name[index] == '/' ) ++index; + unsigned first = index; + while( index < end && name[index] != '/' ) ++index; + if( first < index ) + { + const std::string partial( name, 0, index ); + struct stat st; + if( lstat( partial.c_str(), &st ) == 0 ) + { if( !S_ISDIR( st.st_mode ) ) { errno = ENOTDIR; return false; } } + else if( mkdir( partial.c_str(), mode ) != 0 && errno != EEXIST ) + return false; // if EEXIST, another thread or process created the dir + } + } + return true; + } diff --git a/compress.cc b/compress.cc new file mode 100644 index 0000000..4e74efa --- /dev/null +++ b/compress.cc @@ -0,0 +1,375 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include // for lzlib.h +#include +#include +#include +#include + +#include "tarlz.h" +#include "arg_parser.h" + + +namespace { + +/* Variables used in signal handler context. + They are not declared volatile because the handler never returns. */ +std::string output_filename; +int outfd = -1; +bool delete_output_on_interrupt = false; + + +void set_signals( void (*action)(int) ) + { + std::signal( SIGHUP, action ); + std::signal( SIGINT, action ); + std::signal( SIGTERM, action ); + } + + +void cleanup_and_fail( const int retval ) + { + set_signals( SIG_IGN ); // ignore signals + if( delete_output_on_interrupt ) + { + delete_output_on_interrupt = false; + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n", + program_name, output_filename.c_str() ); + if( outfd >= 0 ) { close( outfd ); outfd = -1; } + if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT ) + show_error( "WARNING: deletion of output file (apparently) failed." ); + } + std::exit( retval ); + } + + +extern "C" void signal_handler( int ) + { + show_error( "Control-C or similar caught, quitting." ); + cleanup_and_fail( 1 ); + } + + +const char * ne_output_filename() // non-empty output file name + { + return output_filename.size() ? output_filename.c_str() : "(stdout)"; + } + + +bool check_tty_in( const char * const input_filename, const int infd ) + { + if( isatty( infd ) ) // for example /dev/tty + { show_file_error( input_filename, + "I won't read archive data from a terminal." ); + close( infd ); return false; } + return true; + } + +bool check_tty_out() + { + if( isatty( outfd ) ) // for example /dev/tty + { show_file_error( ne_output_filename(), + "I won't write compressed data to a terminal." ); + return false; } + return true; + } + + +// Set permissions, owner, and times. +void close_and_set_permissions( const struct stat * const in_statsp ) + { + bool warning = false; + if( in_statsp ) + { + const mode_t mode = in_statsp->st_mode; + // fchown will in many cases return with EPERM, which can be safely ignored. + if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 ) + { if( fchmod( outfd, mode ) != 0 ) warning = true; } + else + if( errno != EPERM || + fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 ) + warning = true; + } + if( close( outfd ) != 0 ) + { + show_error( "Error closing output file", errno ); + cleanup_and_fail( 1 ); + } + outfd = -1; + delete_output_on_interrupt = false; + if( in_statsp ) + { + struct utimbuf t; + t.actime = in_statsp->st_atime; + t.modtime = in_statsp->st_mtime; + if( utime( output_filename.c_str(), &t ) != 0 ) warning = true; + } + if( warning && verbosity >= 1 ) + show_error( "Can't change output file attributes." ); + } + + +bool archive_write( const uint8_t * const buf, const long long size, + LZ_Encoder * const encoder ) + { + static bool flushed = true; // avoid flushing empty lzip members + + if( size <= 0 && flushed ) return true; + flushed = ( size <= 0 ); + enum { obuf_size = 65536 }; + uint8_t obuf[obuf_size]; + long long sz = 0; + if( flushed ) LZ_compress_finish( encoder ); // flush encoder + while( sz < size || flushed ) + { + if( sz < size ) + { const int wr = LZ_compress_write( encoder, buf + sz, + std::min( size - sz, (long long)max_dictionary_size ) ); + if( wr < 0 ) internal_error( "library error (LZ_compress_write)." ); + sz += wr; } + if( sz >= size && !flushed ) break; // minimize dictionary size + const int rd = LZ_compress_read( encoder, obuf, obuf_size ); + if( rd < 0 ) internal_error( "library error (LZ_compress_read)." ); + if( rd == 0 && sz >= size ) break; + if( writeblock( outfd, obuf, rd ) != rd ) + { show_file_error( ne_output_filename(), werr_msg, errno ); return false; } + } + if( LZ_compress_finished( encoder ) == 1 && + LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 ) + internal_error( "library error (LZ_compress_restart_member)." ); + return true; + } + + +bool tail_compress( const Cl_options & cl_opts, + const int infd, Tar_header header, + LZ_Encoder * const encoder ) + { + if( cl_opts.solidity != solid && !archive_write( 0, 0, encoder ) ) + return false; // flush encoder before compressing EOA blocks + int size = header_size; + bool zero = true; // true until non-zero data found after EOA blocks + while( true ) + { + if( size > 0 && !archive_write( header, size, encoder ) ) + { close( infd ); return false; } + if( size < header_size ) break; // EOF + size = readblock( infd, header, header_size ); + if( errno ) return false; + if( zero && !block_is_zero( header, size ) ) + { zero = false; // flush encoder after compressing EOA blocks + if( cl_opts.solidity != solid && !archive_write( 0, 0, encoder ) ) + return false; } + } + return true; + } + + +int compress_archive( const Cl_options & cl_opts, + const std::string & input_filename, + LZ_Encoder * const encoder, + const bool to_stdout, const bool to_file ) + { + const bool one_to_one = !to_stdout && !to_file; + const bool from_stdin = input_filename == "-"; + const char * const filename = from_stdin ? "(stdin)" : input_filename.c_str(); + const int infd = from_stdin ? STDIN_FILENO : open_instream( filename ); + if( infd < 0 || !check_tty_in( filename, infd ) ) return 1; + if( one_to_one ) + { + if( from_stdin ) { outfd = STDOUT_FILENO; output_filename.clear(); } + else + { + output_filename = input_filename + ".lz"; + outfd = open_outstream( output_filename, true, 0, false ); + if( outfd < 0 ) { close( infd ); return 1; } + delete_output_on_interrupt = true; + } + if( !check_tty_out() ) { close( infd ); return 1; } // don't delete a tty + } + if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename ); + + unsigned long long partial_data_size = 0; // size of current block + Extended extended; // metadata from extended records + Resizable_buffer rbuf; // headers and extended records buffer + if( !rbuf.size() ) { show_error( mem_msg ); return 1; } + const char * const rderr_msg = "Read error"; + + while( true ) // process one tar member per iteration + { + int total_header_size = header_size; // size of header(s) read + const int rd = readblock( infd, rbuf.u8(), header_size ); + if( rd == 0 && errno == 0 ) break; // missing EOA blocks + if( rd != header_size ) + { show_file_error( filename, rderr_msg, errno ); close( infd ); return 1; } + + if( to_file && outfd < 0 ) // open outfd after verifying infd + { + outfd = open_outstream( output_filename, true, 0, false ); + // check tty only once and don't try to delete a tty + if( outfd < 0 || !check_tty_out() ) { close( infd ); return 1; } + delete_output_on_interrupt = true; + } + + if( !verify_ustar_chksum( rbuf.u8() ) ) // maybe EOA block + { + if( block_is_zero( rbuf.u8(), header_size ) ) // first EOA block + { tail_compress( cl_opts, infd, rbuf.u8(), encoder ); break; } + show_file_error( filename, bad_hdr_msg ); close( infd ); return 2; + } + + const Typeflag typeflag = (Typeflag)rbuf()[typeflag_o]; + if( typeflag == tf_extended || typeflag == tf_global ) + { + const long long edsize = parse_octal( rbuf.u8() + size_o, size_l ); + const long long bufsize = round_up( edsize ); + // overflow or no extended data + if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX ) + { show_file_error( filename, bad_hdr_msg ); close( infd ); return 2; } + if( !rbuf.resize( total_header_size + bufsize ) ) + { show_file_error( filename, mem_msg ); close( infd ); return 1; } + if( readblock( infd, rbuf.u8() + total_header_size, bufsize ) != bufsize ) + { show_file_error( filename, rderr_msg, errno ); close( infd ); return 1; } + total_header_size += bufsize; + if( typeflag == tf_extended ) // do not parse global headers + { + if( !extended.parse( rbuf() + header_size, edsize, false ) ) + { show_file_error( filename, extrec_msg ); close( infd ); return 2; } + // read ustar header + if( !rbuf.resize( total_header_size + header_size ) ) + { show_file_error( filename, mem_msg ); close( infd ); return 1; } + if( readblock( infd, rbuf.u8() + total_header_size, header_size ) != header_size ) + { show_file_error( filename, errno ? rderr_msg : end_msg, errno ); + close( infd ); return errno ? 1 : 2; } + if( !verify_ustar_chksum( rbuf.u8() ) ) + { show_file_error( filename, bad_hdr_msg ); close( infd ); return 2; } + const Typeflag typeflag2 = (Typeflag)(rbuf() + total_header_size)[typeflag_o]; + if( typeflag2 == tf_extended || typeflag2 == tf_global ) + { const char * msg = ( typeflag2 == tf_global ) ? fv_msg2 : fv_msg3; + show_file_error( filename, msg ); close( infd ); return 2; } + total_header_size += header_size; + } + } + + const long long file_size = round_up( extended.get_file_size_and_reset( + rbuf.u8() + total_header_size - header_size ) ); + if( cl_opts.solidity == bsolid && + block_is_full( total_header_size - header_size, file_size, + cl_opts.data_size, partial_data_size ) && + !archive_write( 0, 0, encoder ) ) { close( infd ); return 1; } + if( !archive_write( rbuf.u8(), total_header_size, encoder ) ) + { close( infd ); return 1; } + + if( file_size ) + { + const long long bufsize = 32 * header_size; + uint8_t buf[bufsize]; + long long rest = file_size; // file_size already rounded up + while( rest > 0 ) + { + int size = std::min( rest, bufsize ); + const int rd = readblock( infd, buf, size ); + rest -= rd; + if( rd != size ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "'%s' ends unexpectedly at pos %llu\n", + filename, file_size - rest ); + close( infd ); return 1; + } + if( !archive_write( buf, size, encoder ) ) { close( infd ); return 1; } + } + } + if( cl_opts.solidity == no_solid && !archive_write( 0, 0, encoder ) ) + { close( infd ); return 1; } // one tar member per lzip member + } + // flush and restart encoder (for next archive) + if( !archive_write( 0, 0, encoder ) ) { close( infd ); return 1; } + const bool need_close = delete_output_on_interrupt && + ( one_to_one || ( to_file && !from_stdin ) ); + struct stat in_stats; + const struct stat * const in_statsp = + ( need_close && fstat( infd, &in_stats ) == 0 ) ? &in_stats : 0; + if( close( infd ) != 0 ) + { show_file_error( filename, eclosf_msg, errno ); return 1; } + if( need_close ) close_and_set_permissions( in_statsp ); + return 0; + } + +} // end namespace + + +int compress( const Cl_options & cl_opts ) + { + if( cl_opts.num_files > 1 && cl_opts.output_filename.size() ) + { show_file_error( cl_opts.output_filename.c_str(), + "Only can compress one archive when using '-o'." ); return 1; } + const bool to_stdout = cl_opts.output_filename == "-"; + if( to_stdout ) // check tty only once + { outfd = STDOUT_FILENO; if( !check_tty_out() ) return 1; } + else outfd = -1; + const bool to_file = !to_stdout && cl_opts.output_filename.size(); + if( to_file ) output_filename = cl_opts.output_filename; + if( !to_stdout && ( cl_opts.filenames_given || to_file ) ) + set_signals( signal_handler ); + + LZ_Encoder * encoder = LZ_compress_open( + option_mapping[cl_opts.level].dictionary_size, + option_mapping[cl_opts.level].match_len_limit, LLONG_MAX ); + if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) + { + if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error ) + show_error( mem_msg2 ); + else + internal_error( "invalid argument to encoder." ); + return 1; + } + + if( !cl_opts.filenames_given ) + return compress_archive( cl_opts, "-", encoder, to_stdout, to_file ); + int retval = 0; + bool stdin_used = false; + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + if( nonempty_arg( cl_opts.parser, i ) ) // skip opts, empty names + { + if( cl_opts.parser.argument( i ) == "-" ) + { if( stdin_used ) continue; else stdin_used = true; } + const int tmp = compress_archive( cl_opts, cl_opts.parser.argument( i ), + encoder, to_stdout, to_file ); + if( tmp ) + { set_retval( retval, tmp ); + if( delete_output_on_interrupt ) cleanup_and_fail( retval ); } + } + // flush and close encoder if needed + if( outfd >= 0 && archive_write( 0, 0, encoder ) && + LZ_compress_close( encoder ) < 0 ) + { show_error( "LZ_compress_close failed." ); set_retval( retval, 1 ); } + if( outfd >= 0 && close( outfd ) != 0 ) // to_stdout + { + show_error( "Error closing stdout", errno ); + set_retval( retval, 1 ); + } + return retval; + } diff --git a/configure b/configure new file mode 100755 index 0000000..e02fcf0 --- /dev/null +++ b/configure @@ -0,0 +1,200 @@ +#! /bin/sh +# configure script for Tarlz - Archiver with multimember lzip compression +# Copyright (C) 2013-2022 Antonio Diaz Diaz. +# +# This configure script is free software: you have unlimited permission +# to copy, distribute, and modify it. + +pkgname=tarlz +pkgversion=0.23 +progname=tarlz +srctrigger=doc/${pkgname}.texi + +# clear some things potentially inherited from environment. +LC_ALL=C +export LC_ALL +srcdir= +prefix=/usr/local +exec_prefix='$(prefix)' +bindir='$(exec_prefix)/bin' +datarootdir='$(prefix)/share' +infodir='$(datarootdir)/info' +mandir='$(datarootdir)/man' +CXX=g++ +CPPFLAGS= +CXXFLAGS='-Wall -W -O2' +LDFLAGS= +LIBS='-llz -lpthread' + +# checking whether we are using GNU C++. +/bin/sh -c "${CXX} --version" > /dev/null 2>&1 || { CXX=c++ ; CXXFLAGS=-O2 ; } + +# Loop over all args +args= +no_create= +while [ $# != 0 ] ; do + + # Get the first arg, and shuffle + option=$1 ; arg2=no + shift + + # Add the argument quoted to args + if [ -z "${args}" ] ; then args="\"${option}\"" + else args="${args} \"${option}\"" ; fi + + # Split out the argument for options that take them + case ${option} in + *=*) optarg=`echo "${option}" | sed -e 's,^[^=]*=,,;s,/$,,'` ;; + esac + + # Process the options + case ${option} in + --help | -h) + echo "Usage: $0 [OPTION]... [VAR=VALUE]..." + echo + echo "To assign makefile variables (e.g., CXX, CXXFLAGS...), specify them as" + echo "arguments to configure in the form VAR=VALUE." + echo + echo "Options and variables: [defaults in brackets]" + echo " -h, --help display this help and exit" + echo " -V, --version output version information and exit" + echo " --srcdir=DIR find the sources in DIR [. or ..]" + echo " --prefix=DIR install into DIR [${prefix}]" + echo " --exec-prefix=DIR base directory for arch-dependent files [${exec_prefix}]" + echo " --bindir=DIR user executables directory [${bindir}]" + echo " --datarootdir=DIR base directory for doc and data [${datarootdir}]" + echo " --infodir=DIR info files directory [${infodir}]" + echo " --mandir=DIR man pages directory [${mandir}]" + echo " CXX=COMPILER C++ compiler to use [${CXX}]" + echo " CPPFLAGS=OPTIONS command line options for the preprocessor [${CPPFLAGS}]" + echo " CXXFLAGS=OPTIONS command line options for the C++ compiler [${CXXFLAGS}]" + echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS" + echo " LDFLAGS=OPTIONS command line options for the linker [${LDFLAGS}]" + echo " LIBS=OPTIONS libraries to pass to the linker [${LIBS}]" + echo + exit 0 ;; + --version | -V) + echo "Configure script for ${pkgname} version ${pkgversion}" + exit 0 ;; + --srcdir) srcdir=$1 ; arg2=yes ;; + --prefix) prefix=$1 ; arg2=yes ;; + --exec-prefix) exec_prefix=$1 ; arg2=yes ;; + --bindir) bindir=$1 ; arg2=yes ;; + --datarootdir) datarootdir=$1 ; arg2=yes ;; + --infodir) infodir=$1 ; arg2=yes ;; + --mandir) mandir=$1 ; arg2=yes ;; + + --srcdir=*) srcdir=${optarg} ;; + --prefix=*) prefix=${optarg} ;; + --exec-prefix=*) exec_prefix=${optarg} ;; + --bindir=*) bindir=${optarg} ;; + --datarootdir=*) datarootdir=${optarg} ;; + --infodir=*) infodir=${optarg} ;; + --mandir=*) mandir=${optarg} ;; + --no-create) no_create=yes ;; + + CXX=*) CXX=${optarg} ;; + CPPFLAGS=*) CPPFLAGS=${optarg} ;; + CXXFLAGS=*) CXXFLAGS=${optarg} ;; + CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;; + LDFLAGS=*) LDFLAGS=${optarg} ;; + LIBS=*) LIBS="${optarg} ${LIBS}" ;; + + --*) + echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;; + *=* | *-*-*) ;; + *) + echo "configure: unrecognized option: '${option}'" 1>&2 + echo "Try 'configure --help' for more information." 1>&2 + exit 1 ;; + esac + + # Check if the option took a separate argument + if [ "${arg2}" = yes ] ; then + if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift + else echo "configure: Missing argument to '${option}'" 1>&2 + exit 1 + fi + fi +done + +# Find the source files, if location was not specified. +srcdirtext= +if [ -z "${srcdir}" ] ; then + srcdirtext="or . or .." ; srcdir=. + if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi + if [ ! -r "${srcdir}/${srctrigger}" ] ; then + ## the sed command below emulates the dirname command + srcdir=`echo "$0" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` + fi +fi + +if [ ! -r "${srcdir}/${srctrigger}" ] ; then + echo "configure: Can't find sources in ${srcdir} ${srcdirtext}" 1>&2 + echo "configure: (At least ${srctrigger} is missing)." 1>&2 + exit 1 +fi + +# Set srcdir to . if that's what it is. +if [ "`pwd`" = "`cd "${srcdir}" ; pwd`" ] ; then srcdir=. ; fi + +echo +if [ -z "${no_create}" ] ; then + echo "creating config.status" + rm -f config.status + cat > config.status << EOF +#! /bin/sh +# This file was generated automatically by configure. Don't edit. +# Run this file to recreate the current configuration. +# +# This script is free software: you have unlimited permission +# to copy, distribute, and modify it. + +exec /bin/sh $0 ${args} --no-create +EOF + chmod +x config.status +fi + +echo "creating Makefile" +echo "VPATH = ${srcdir}" +echo "prefix = ${prefix}" +echo "exec_prefix = ${exec_prefix}" +echo "bindir = ${bindir}" +echo "datarootdir = ${datarootdir}" +echo "infodir = ${infodir}" +echo "mandir = ${mandir}" +echo "CXX = ${CXX}" +echo "CPPFLAGS = ${CPPFLAGS}" +echo "CXXFLAGS = ${CXXFLAGS}" +echo "LDFLAGS = ${LDFLAGS}" +echo "LIBS = ${LIBS}" +rm -f Makefile +cat > Makefile << EOF +# Makefile for Tarlz - Archiver with multimember lzip compression +# Copyright (C) 2013-2022 Antonio Diaz Diaz. +# This file was generated automatically by configure. Don't edit. +# +# This Makefile is free software: you have unlimited permission +# to copy, distribute, and modify it. + +pkgname = ${pkgname} +pkgversion = ${pkgversion} +progname = ${progname} +VPATH = ${srcdir} +prefix = ${prefix} +exec_prefix = ${exec_prefix} +bindir = ${bindir} +datarootdir = ${datarootdir} +infodir = ${infodir} +mandir = ${mandir} +CXX = ${CXX} +CPPFLAGS = ${CPPFLAGS} +CXXFLAGS = ${CXXFLAGS} +LDFLAGS = ${LDFLAGS} +LIBS = ${LIBS} +EOF +cat "${srcdir}/Makefile.in" >> Makefile + +echo "OK. Now you can run make." +echo "If make fails, verify that the compression library lzlib is correctly" +echo "installed (see INSTALL)." diff --git a/create.cc b/create.cc new file mode 100644 index 0000000..53ba5f5 --- /dev/null +++ b/create.cc @@ -0,0 +1,788 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include // for lzlib.h +#include +#include +#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ + !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__ +#include // for major, minor +#endif +#include +#include +#include +#include + +#include "tarlz.h" +#include "arg_parser.h" +#include "create.h" + + +Archive_attrs archive_attrs; // archive attributes at time of creation + + +namespace { + +const Cl_options * gcl_opts = 0; // local vars needed by add_member +LZ_Encoder * encoder = 0; +const char * archive_namep = 0; +unsigned long long partial_data_size = 0; // size of current block +Resizable_buffer grbuf; // extended header + data +int goutfd = -1; +int error_status = 0; + + +bool option_C_after_relative_filename( const Arg_parser & parser ) + { + for( int i = 0; i < parser.arguments(); ++i ) + if( nonempty_arg( parser, i ) && parser.argument( i )[0] != '/' ) + while( ++i < parser.arguments() ) + if( parser.code( i ) == 'C' ) return true; + return false; + } + + +/* Check archive type. Return position of EOA blocks or -1 if failure. + If remove_eoa, leave fd file pos at beginning of the EOA blocks. + Else, leave fd file pos at 0. +*/ +long long check_compressed_appendable( const int fd, const bool remove_eoa ) + { + struct stat st; // fd must be regular + if( fstat( fd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return -1; + if( lseek( fd, 0, SEEK_SET ) != 0 ) return -1; + enum { bufsize = header_size + ( header_size / 8 ) }; + uint8_t buf[bufsize]; + const int rd = readblock( fd, buf, bufsize ); + if( rd == 0 && errno == 0 ) return 0; // append to empty archive + if( rd < min_member_size || ( rd != bufsize && errno ) ) return -1; + const Lzip_header * const p = (const Lzip_header *)buf; // shut up gcc + if( !p->verify_magic() || !p->verify_version() ) return -1; + LZ_Decoder * decoder = LZ_decompress_open(); // decompress first header + if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok || + LZ_decompress_write( decoder, buf, rd ) != rd || + LZ_decompress_read( decoder, buf, header_size ) != header_size ) + { LZ_decompress_close( decoder ); return -1; } + LZ_decompress_close( decoder ); + const bool maybe_eoa = block_is_zero( buf, header_size ); + if( !verify_ustar_chksum( buf ) && !maybe_eoa ) return -1; + const long long end = lseek( fd, 0, SEEK_END ); + if( end < min_member_size ) return -1; + + Lzip_trailer trailer; // read last trailer + if( seek_read( fd, trailer.data, Lzip_trailer::size, + end - Lzip_trailer::size ) != Lzip_trailer::size ) return -1; + const long long member_size = trailer.member_size(); + if( member_size < min_member_size || member_size > end || + ( maybe_eoa && member_size != end ) ) return -1; // garbage after EOA? + + Lzip_header header; // read last header + if( seek_read( fd, header.data, Lzip_header::size, + end - member_size ) != Lzip_header::size ) return -1; + if( !header.verify_magic() || !header.verify_version() || + !isvalid_ds( header.dictionary_size() ) ) return -1; + + // EOA marker in last member must contain between 512 and 32256 zeros alone + const unsigned long long data_size = trailer.data_size(); + if( data_size < header_size || data_size > 32256 ) return -1; + const unsigned data_crc = trailer.data_crc(); + const CRC32 crc32; + uint32_t crc = 0xFFFFFFFFU; + for( unsigned i = 0; i < data_size; ++i ) crc32.update_byte( crc, 0 ); + crc ^= 0xFFFFFFFFU; + if( crc != data_crc ) return -1; + + const long long pos = remove_eoa ? end - member_size : 0; + if( lseek( fd, pos, SEEK_SET ) != pos ) return -1; + return end - member_size; + } + + +/* Skip all tar headers. + Return position of EOA blocks, -1 if failure, -2 if out of memory. + If remove_eoa, leave fd file pos at beginning of the EOA blocks. + Else, leave fd file pos at 0. +*/ +long long check_uncompressed_appendable( const int fd, const bool remove_eoa ) + { + struct stat st; // fd must be regular + if( fstat( fd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return -1; + if( lseek( fd, 0, SEEK_SET ) != 0 ) return -1; + if( st.st_size <= 0 ) return 0; // append to empty archive + long long eoa_pos = 0; // pos of EOA blocks + Extended extended; // metadata from extended records + Resizable_buffer rbuf; // extended records buffer + bool prev_extended = false; // prev header was extended + if( !rbuf.size() ) return -2; + + while( true ) // process one tar header per iteration + { + Tar_header header; + const int rd = readblock( fd, header, header_size ); + if( rd == 0 && errno == 0 ) break; // missing EOA blocks + if( rd != header_size ) return -1; + if( !verify_ustar_chksum( header ) ) // maybe EOA block + { if( block_is_zero( header, header_size ) ) break; else return -1; } + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_extended || typeflag == tf_global ) + { + if( prev_extended ) return -1; + const long long edsize = parse_octal( header + size_o, size_l ); + const long long bufsize = round_up( edsize ); + if( edsize <= 0 || edsize >= 1LL << 33 || bufsize >= INT_MAX ) + return -1; // overflow or no extended data + if( !rbuf.resize( bufsize ) ) return -2; + if( readblock( fd, rbuf.u8(), bufsize ) != bufsize ) + return -1; + if( typeflag == tf_extended ) + { if( !extended.parse( rbuf(), edsize, false ) ) return -1; + prev_extended = true; } + continue; + } + prev_extended = false; + + eoa_pos = lseek( fd, round_up( extended.get_file_size_and_reset( header ) ), + SEEK_CUR ); + if( eoa_pos <= 0 ) return -1; + } + + if( prev_extended ) return -1; + const long long pos = remove_eoa ? eoa_pos : 0; + if( lseek( fd, pos, SEEK_SET ) != pos ) return -1; + return eoa_pos; + } + + +bool archive_write( const uint8_t * const buf, const int size ) + { + static bool flushed = true; // avoid flushing empty lzip members + + if( size <= 0 && flushed ) return true; + flushed = ( size <= 0 ); + if( !encoder ) // uncompressed + return writeblock_wrapper( goutfd, buf, size ); + enum { obuf_size = 65536 }; + uint8_t obuf[obuf_size]; + int sz = 0; + if( size <= 0 ) LZ_compress_finish( encoder ); // flush encoder + while( sz < size || size <= 0 ) + { + const int wr = LZ_compress_write( encoder, buf + sz, size - sz ); + if( wr < 0 ) internal_error( "library error (LZ_compress_write)." ); + sz += wr; + if( sz >= size && size > 0 ) break; // minimize dictionary size + const int rd = LZ_compress_read( encoder, obuf, obuf_size ); + if( rd < 0 ) internal_error( "library error (LZ_compress_read)." ); + if( rd == 0 && sz >= size ) break; + if( !writeblock_wrapper( goutfd, obuf, rd ) ) return false; + } + if( LZ_compress_finished( encoder ) == 1 && + LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 ) + internal_error( "library error (LZ_compress_restart_member)." ); + return true; + } + + +bool write_extended( const Extended & extended ) + { + const long long ebsize = extended.format_block( grbuf ); // may be 0 + if( ebsize < 0 ) + { show_error( ( ebsize == -2 ) ? mem_msg2 : eferec_msg ); return false; } + for( long long pos = 0; pos < ebsize; ) // write extended block to archive + { + int size = std::min( ebsize - pos, 1LL << 20 ); + if( !archive_write( grbuf.u8() + pos, size ) ) return false; + pos += size; + } + return true; + } + + +// Return true if it stores filename in the ustar header. +bool store_name( const char * const filename, Extended & extended, + Tar_header header, const bool force_extended_name ) + { + const char * const stored_name = + remove_leading_dotslash( filename, &extended.removed_prefix, true ); + + if( !force_extended_name ) // try storing filename in the ustar header + { + const int len = std::strlen( stored_name ); + enum { max_len = prefix_l + 1 + name_l }; // prefix + '/' + name + if( len <= name_l ) // stored_name fits in name + { std::memcpy( header + name_o, stored_name, len ); return true; } + if( len <= max_len ) // find shortest prefix + for( int i = len - name_l - 1; i < len && i <= prefix_l; ++i ) + if( stored_name[i] == '/' ) // stored_name can be split + { + std::memcpy( header + name_o, stored_name + i + 1, len - i - 1 ); + std::memcpy( header + prefix_o, stored_name, i ); + return true; + } + } + // store filename in extended record, leave name zeroed in ustar header + extended.path( stored_name ); + return false; + } + + +// add one tar member to the archive +int add_member( const char * const filename, const struct stat *, + const int flag, struct FTW * ) + { + if( Exclude::excluded( filename ) ) return 0; // skip excluded files + long long file_size; + Extended extended; // metadata for extended records + Tar_header header; + if( !fill_headers( filename, extended, header, file_size, flag ) ) return 0; + print_removed_prefix( extended.removed_prefix ); + const int infd = file_size ? open_instream( filename ) : -1; + if( file_size && infd < 0 ) { set_error_status( 1 ); return 0; } + + if( encoder && gcl_opts->solidity == bsolid && + block_is_full( extended.full_size(), file_size, gcl_opts->data_size, + partial_data_size ) && !archive_write( 0, 0 ) ) return 1; + + if( !write_extended( extended ) || !archive_write( header, header_size ) ) + return 1; + if( file_size ) + { + const long long bufsize = 32 * header_size; + uint8_t buf[bufsize]; + long long rest = file_size; + while( rest > 0 ) + { + int size = std::min( rest, bufsize ); + const int rd = readblock( infd, buf, size ); + rest -= rd; + if( rd != size ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "File '%s' ends unexpectedly at pos %llu\n", + filename, file_size - rest ); + close( infd ); return 1; + } + if( rest == 0 ) // last read + { + const int rem = file_size % header_size; + if( rem > 0 ) + { const int padding = header_size - rem; + std::memset( buf + size, 0, padding ); size += padding; } + } + if( !archive_write( buf, size ) ) { close( infd ); return 1; } + } + if( close( infd ) != 0 ) + { show_file_error( filename, eclosf_msg, errno ); return 1; } + } + if( encoder && gcl_opts->solidity == no_solid && !archive_write( 0, 0 ) ) + return 1; + if( gcl_opts->warn_newer && archive_attrs.is_newer( filename ) ) + { show_file_error( filename, "File is newer than the archive." ); + set_error_status( 1 ); } + if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename ); + return 0; + } + + +bool check_tty_out( const char * const archive_namep, const int outfd, + const bool to_stdout ) + { + if( isatty( outfd ) ) // for example /dev/tty + { show_file_error( archive_namep, to_stdout ? + "I won't write archive data to a terminal (missing -f option?)" : + "I won't write archive data to a terminal." ); + return false; } + return true; + } + +} // end namespace + + +// infd and outfd can refer to the same file if copying to a lower file +// position or if source and destination blocks don't overlap. +// max_size < 0 means no size limit. +bool copy_file( const int infd, const int outfd, const long long max_size ) + { + const long long buffer_size = 65536; + // remaining number of bytes to copy + long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size ); + long long copied_size = 0; + uint8_t * const buffer = new uint8_t[buffer_size]; + bool error = false; + + while( rest > 0 ) + { + const int size = std::min( buffer_size, rest ); + if( max_size >= 0 ) rest -= size; + const int rd = readblock( infd, buffer, size ); + if( rd != size && errno ) + { show_error( "Error reading input file", errno ); error = true; break; } + if( rd > 0 ) + { + if( !writeblock_wrapper( outfd, buffer, rd ) ) { error = true; break; } + copied_size += rd; + } + if( rd < size ) break; // EOF + } + delete[] buffer; + return ( !error && ( max_size < 0 || copied_size == max_size ) ); + } + + +bool writeblock_wrapper( const int outfd, const uint8_t * const buffer, + const int size ) + { + if( writeblock( outfd, buffer, size ) != size ) + { show_file_error( archive_namep, werr_msg, errno ); return false; } + return true; + } + + +// write End-Of-Archive records +bool write_eoa_records( const int outfd, const bool compressed ) + { + if( compressed ) + { + enum { eoa_member_size = 44 }; + const uint8_t eoa_member[eoa_member_size] = { + 0x4C, 0x5A, 0x49, 0x50, 0x01, 0x0C, 0x00, 0x00, 0x6F, 0xFD, 0xFF, 0xFF, + 0xA3, 0xB7, 0x80, 0x0C, 0x82, 0xDB, 0xFF, 0xFF, 0x9F, 0xF0, 0x00, 0x00, + 0x2E, 0xAF, 0xB5, 0xEF, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + return writeblock_wrapper( outfd, eoa_member, eoa_member_size ); + } + enum { bufsize = 2 * header_size }; + uint8_t buf[bufsize]; + std::memset( buf, 0, bufsize ); + return writeblock_wrapper( outfd, buf, bufsize ); + } + + +/* Remove any amount of leading "./" and '/' strings from filename. + Optionally also remove prefixes containing a ".." component. + Return the removed prefix in *removed_prefixp. +*/ +const char * remove_leading_dotslash( const char * const filename, + std::string * const removed_prefixp, + const bool dotdot ) + { + const char * p = filename; + + if( dotdot ) + for( int i = 0; filename[i]; ++i ) + if( dotdot_at_i( filename, i ) ) p = filename + i + 2; + while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p; + if( p != filename ) removed_prefixp->assign( filename, p - filename ); + else removed_prefixp->clear(); // no prefix was removed + if( *p == 0 && *filename != 0 ) p = "."; + return p; + } + + +/* If msgp is null, print the message, else return the message in *msgp. + If prefix is already in the list, print nothing or return empty *msgp. + Return true if a message is printed or returned in *msgp. */ +bool print_removed_prefix( const std::string & prefix, + std::string * const msgp ) + { + // prevent two threads from modifying the list of prefixes at the same time + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + static std::vector< std::string > prefixes; // list of prefixes + + if( verbosity < 0 || prefix.empty() ) + { if( msgp ) msgp->clear(); return false; } + xlock( &mutex ); + for( unsigned i = 0; i < prefixes.size(); ++i ) + if( prefixes[i] == prefix ) + { xunlock( &mutex ); if( msgp ) msgp->clear(); return false; } + prefixes.push_back( prefix ); + std::string msg( "Removing leading '" ); msg += prefix; + msg += "' from member names."; + if( msgp ) *msgp = msg; else show_error( msg.c_str() ); + xunlock( &mutex ); // put here to prevent mixing calls to show_error + return true; + } + + +// set file_size != 0 only for regular files +bool fill_headers( const char * const filename, Extended & extended, + Tar_header header, long long & file_size, const int flag ) + { + struct stat st; + if( hstat( filename, &st, gcl_opts->dereference ) != 0 ) + { show_file_error( filename, cant_stat, errno ); + set_error_status( 1 ); return false; } + if( archive_attrs.is_the_archive( st ) ) + { show_file_error( archive_namep, "Archive can't contain itself; not dumped." ); + return false; } + init_tar_header( header ); + bool force_extended_name = false; + + const mode_t mode = st.st_mode; + print_octal( header + mode_o, mode_l - 1, + mode & ( S_ISUID | S_ISGID | S_ISVTX | + S_IRWXU | S_IRWXG | S_IRWXO ) ); + const long long uid = ( gcl_opts->uid >= 0 ) ? gcl_opts->uid : st.st_uid; + const long long gid = ( gcl_opts->gid >= 0 ) ? gcl_opts->gid : st.st_gid; + if( uid_in_ustar_range( uid ) ) print_octal( header + uid_o, uid_l - 1, uid ); + else if( extended.set_uid( uid ) ) force_extended_name = true; + if( uid_in_ustar_range( gid ) ) print_octal( header + gid_o, gid_l - 1, gid ); + else if( extended.set_gid( gid ) ) force_extended_name = true; + const long long mtime = gcl_opts->mtime_set ? gcl_opts->mtime : st.st_mtime; + if( time_in_ustar_range( mtime ) ) + print_octal( header + mtime_o, mtime_l - 1, mtime ); + else { extended.set_atime( gcl_opts->mtime_set ? mtime : st.st_atime ); + extended.set_mtime( mtime ); force_extended_name = true; } + Typeflag typeflag; + if( S_ISREG( mode ) ) typeflag = tf_regular; + else if( S_ISDIR( mode ) ) + { + typeflag = tf_directory; + if( flag == FTW_DNR ) + { show_file_error( filename, "Can't open directory", errno ); + set_error_status( 1 ); return false; } + } + else if( S_ISLNK( mode ) ) + { + typeflag = tf_symlink; + long len, sz; + if( st.st_size <= linkname_l ) + { + len = sz = readlink( filename, (char *)header + linkname_o, linkname_l ); + while( len > 1 && header[linkname_o+len-1] == '/' ) // trailing '/' + { --len; header[linkname_o+len] = 0; } + } + else + { + char * const buf = new char[st.st_size+1]; + len = sz = readlink( filename, buf, st.st_size ); + if( sz == st.st_size ) + { + while( len > 1 && buf[len-1] == '/' ) --len; // trailing '/' + if( len <= linkname_l ) std::memcpy( header + linkname_o, buf, len ); + else { buf[len] = 0; extended.linkpath( buf ); + force_extended_name = true; } + } + delete[] buf; + } + if( sz != st.st_size ) + { + if( sz < 0 ) + show_file_error( filename, "Error reading symbolic link", errno ); + else + show_file_error( filename, "Wrong size reading symbolic link.\n" + "Please, send a bug report to the maintainers of your filesystem, " + "mentioning\n'wrong st_size of symbolic link'.\nSee " + "http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/sys_stat.h.html" ); + set_error_status( 1 ); return false; + } + } + else if( S_ISCHR( mode ) || S_ISBLK( mode ) ) + { + typeflag = S_ISCHR( mode ) ? tf_chardev : tf_blockdev; + if( (unsigned)major( st.st_rdev ) >= 2 << 20 || + (unsigned)minor( st.st_rdev ) >= 2 << 20 ) + { show_file_error( filename, "devmajor or devminor is larger than 2_097_151." ); + set_error_status( 1 ); return false; } + print_octal( header + devmajor_o, devmajor_l - 1, major( st.st_rdev ) ); + print_octal( header + devminor_o, devminor_l - 1, minor( st.st_rdev ) ); + } + else if( S_ISFIFO( mode ) ) typeflag = tf_fifo; + else { show_file_error( filename, "Unknown file type." ); + set_error_status( 2 ); return false; } + header[typeflag_o] = typeflag; + + if( uid == (long long)( (uid_t)uid ) ) // get name if uid is in range + { const struct passwd * const pw = getpwuid( uid ); + if( pw && pw->pw_name ) + std::strncpy( (char *)header + uname_o, pw->pw_name, uname_l - 1 ); } + + if( gid == (long long)( (gid_t)gid ) ) // get name if gid is in range + { const struct group * const gr = getgrgid( gid ); + if( gr && gr->gr_name ) + std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 ); } + + file_size = ( typeflag == tf_regular && st.st_size > 0 && + st.st_size <= max_file_size ) ? st.st_size : 0; + if( file_size >= 1LL << 33 ) + { extended.file_size( file_size ); force_extended_name = true; } + else print_octal( header + size_o, size_l - 1, file_size ); + store_name( filename, extended, header, force_extended_name ); + print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) ); + return true; + } + + +bool block_is_full( const long long extended_size, + const unsigned long long file_size, + const unsigned long long target_size, + unsigned long long & partial_data_size ) + { + const unsigned long long member_size = // may overflow 'long long' + header_size + extended_size + round_up( file_size ); + if( partial_data_size >= target_size || + ( partial_data_size >= min_data_size && + partial_data_size + member_size / 2 > target_size ) ) + { partial_data_size = member_size; return true; } + partial_data_size += member_size; return false; + } + + +void set_error_status( const int retval ) + { + // prevent two threads from modifying the error_status at the same time + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + + xlock( &mutex ); + if( error_status < retval ) error_status = retval; + xunlock( &mutex ); + } + +int final_exit_status( int retval, const bool show_msg ) + { + if( retval == 0 && error_status ) + { if( show_msg ) + show_error( "Exiting with failure status due to previous errors." ); + retval = error_status; } + return retval; + } + +unsigned ustar_chksum( const Tar_header header ) + { + unsigned chksum = chksum_l * 0x20; // treat chksum field as spaces + for( int i = 0; i < chksum_o; ++i ) chksum += header[i]; + for( int i = chksum_o + chksum_l; i < header_size; ++i ) chksum += header[i]; + return chksum; + } + + +bool verify_ustar_chksum( const Tar_header header ) + { return ( verify_ustar_magic( header ) && + ustar_chksum( header ) == parse_octal( header + chksum_o, chksum_l ) ); } + + +bool has_lz_ext( const std::string & name ) + { + return ( name.size() > 3 && + name.compare( name.size() - 3, 3, ".lz" ) == 0 ) || + ( name.size() > 4 && + name.compare( name.size() - 4, 4, ".tlz" ) == 0 ); + } + + +int concatenate( const Cl_options & cl_opts ) + { + if( cl_opts.num_files <= 0 ) + { if( verbosity >= 1 ) show_error( "Nothing to concatenate." ); return 0; } + const bool to_stdout = cl_opts.archive_name.empty(); + archive_namep = to_stdout ? "(stdout)" : cl_opts.archive_name.c_str(); + const int outfd = + to_stdout ? STDOUT_FILENO : open_outstream( cl_opts.archive_name, false ); + if( outfd < 0 ) return 1; + if( !check_tty_out( archive_namep, outfd, to_stdout ) ) + { close( outfd ); return 1; } + if( !to_stdout && !archive_attrs.init( outfd ) ) + { show_file_error( archive_namep, "Can't stat", errno ); return 1; } + int compressed; // tri-state bool + if( to_stdout ) compressed = -1; // unknown + else + { + compressed = has_lz_ext( cl_opts.archive_name ); // default value + long long pos = check_compressed_appendable( outfd, true ); + if( pos > 0 ) compressed = true; + else if( pos < 0 ) + { + pos = check_uncompressed_appendable( outfd, true ); + if( pos > 0 ) compressed = false; + else if( pos == -2 ) { show_error( mem_msg ); close( outfd ); return 1; } + else if( pos < 0 ) + { show_file_error( archive_namep, compressed ? + "This does not look like an appendable tar.lz archive." : + "This does not look like an appendable tar archive." ); + close( outfd ); return 2; } + } + } + + int retval = 0; + bool eoa_pending = false; + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) // copy archives + { + if( !nonempty_arg( cl_opts.parser, i ) ) continue; // skip opts, empty names + const char * const filename = cl_opts.parser.argument( i ).c_str(); + if( Exclude::excluded( filename ) ) continue; // skip excluded files + const int infd = open_instream( filename ); + if( infd < 0 ) { retval = 1; break; } + struct stat st; + if( !to_stdout && fstat( infd, &st ) == 0 && archive_attrs.is_the_archive( st ) ) + { show_file_error( filename, "Archive can't contain itself; not concatenated." ); + close( infd ); continue; } + long long size; + if( compressed < 0 ) // not initialized yet + { + if( ( size = check_compressed_appendable( infd, false ) ) > 0 ) + compressed = true; + else if( ( size = check_uncompressed_appendable( infd, false ) ) > 0 ) + compressed = false; + else if( size != -2 ) { size = -1 ; compressed = has_lz_ext( filename ); } + } + else size = compressed ? check_compressed_appendable( infd, false ) : + check_uncompressed_appendable( infd, false ); + if( size == -2 ) + { show_error( mem_msg ); close( infd ); retval = 1; break; } + if( size < 0 ) + { show_file_error( filename, compressed ? + "Not an appendable tar.lz archive." : + "Not an appendable tar archive." ); + close( infd ); retval = 2; break; } + if( !copy_file( infd, outfd, size ) || close( infd ) != 0 ) + { show_file_error( filename, "Error copying archive", errno ); + eoa_pending = false; retval = 1; break; } + eoa_pending = true; + if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename ); + } + + if( eoa_pending && !write_eoa_records( outfd, compressed ) && retval == 0 ) + retval = 1; + if( close( outfd ) != 0 && retval == 0 ) + { show_file_error( archive_namep, eclosa_msg, errno ); retval = 1; } + return retval; + } + + +int encode( const Cl_options & cl_opts ) + { + if( !grbuf.size() ) { show_error( mem_msg ); return 1; } + const bool compressed = ( cl_opts.level >= 0 && cl_opts.level <= 9 ); + const bool to_stdout = cl_opts.archive_name.empty(); + archive_namep = to_stdout ? "(stdout)" : cl_opts.archive_name.c_str(); + gcl_opts = &cl_opts; + + if( !to_stdout && !compressed && has_lz_ext( cl_opts.archive_name ) ) + { show_file_error( archive_namep, + "Uncompressed mode incompatible with .lz extension." ); return 2; } + + const bool append = cl_opts.program_mode == m_append; + if( cl_opts.num_files <= 0 ) + { + if( !append && !to_stdout ) // create archive + { show_error( "Cowardly refusing to create an empty archive.", 0, true ); + return 1; } + else // create/append to stdout or append to archive + { if( verbosity >= 1 ) show_error( "Nothing to append." ); return 0; } + } + + if( to_stdout ) // create/append to stdout + goutfd = STDOUT_FILENO; + else // create/append to archive + if( ( goutfd = open_outstream( cl_opts.archive_name, !append ) ) < 0 ) + return 1; + if( !check_tty_out( archive_namep, goutfd, to_stdout ) ) + { close( goutfd ); return 1; } + if( append && !to_stdout ) + { + if( compressed && check_compressed_appendable( goutfd, true ) < 0 ) + { show_file_error( archive_namep, + "This does not look like an appendable tar.lz archive." ); + close( goutfd ); return 2; } + if( !compressed ) + { + const long long pos = check_uncompressed_appendable( goutfd, true ); + if( pos == -2 ) { show_error( mem_msg ); close( goutfd ); return 1; } + if( pos < 0 ) { show_file_error( archive_namep, + "This does not look like an appendable tar archive." ); + close( goutfd ); return 2; } + } + } + + if( !archive_attrs.init( goutfd ) ) + { show_file_error( archive_namep, "Can't stat", errno ); + close( goutfd ); return 1; } + + if( compressed ) + { + /* CWD is not per-thread; multi-threaded --create can't be used if a + -C option appears after a relative filename in the command line. */ + if( cl_opts.solidity != asolid && cl_opts.solidity != solid && + cl_opts.num_workers > 0 && + !option_C_after_relative_filename( cl_opts.parser ) ) + { + // show_file_error( archive_namep, "Multi-threaded --create" ); + return encode_lz( cl_opts, archive_namep, goutfd ); + } + encoder = LZ_compress_open( option_mapping[cl_opts.level].dictionary_size, + option_mapping[cl_opts.level].match_len_limit, LLONG_MAX ); + if( !encoder || LZ_compress_errno( encoder ) != LZ_ok ) + { + if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error ) + show_error( mem_msg2 ); + else + internal_error( "invalid argument to encoder." ); + close( goutfd ); return 1; + } + } + + int retval = 0; + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) // parse command line + { + const int code = cl_opts.parser.code( i ); + const std::string & arg = cl_opts.parser.argument( i ); + const char * filename = arg.c_str(); + if( code == 'C' && chdir( filename ) != 0 ) + { show_file_error( filename, chdir_msg, errno ); retval = 1; break; } + if( code ) continue; // skip options + if( cl_opts.parser.argument( i ).empty() ) continue; // skip empty names + std::string deslashed; // arg without trailing slashes + unsigned len = arg.size(); + while( len > 1 && arg[len-1] == '/' ) --len; + if( len < arg.size() ) + { deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); } + if( Exclude::excluded( filename ) ) continue; // skip excluded files + struct stat st; + if( lstat( filename, &st ) != 0 ) // filename from command line + { show_file_error( filename, cant_stat, errno ); set_error_status( 1 ); } + else if( ( retval = nftw( filename, add_member, 16, + cl_opts.dereference ? 0 : FTW_PHYS ) ) != 0 ) + break; // write error + else if( encoder && cl_opts.solidity == dsolid && !archive_write( 0, 0 ) ) + { retval = 1; break; } + } + + if( retval == 0 ) // write End-Of-Archive records + { + enum { bufsize = 2 * header_size }; + uint8_t buf[bufsize]; + std::memset( buf, 0, bufsize ); + if( encoder && + ( cl_opts.solidity == asolid || + ( cl_opts.solidity == bsolid && partial_data_size ) ) && + !archive_write( 0, 0 ) ) retval = 1; // flush encoder + else if( !archive_write( buf, bufsize ) || + ( encoder && !archive_write( 0, 0 ) ) ) retval = 1; + } + if( encoder && LZ_compress_close( encoder ) < 0 ) + { show_error( "LZ_compress_close failed." ); retval = 1; } + if( close( goutfd ) != 0 && retval == 0 ) + { show_file_error( archive_namep, eclosa_msg, errno ); retval = 1; } + return final_exit_status( retval ); + } diff --git a/create.h b/create.h new file mode 100644 index 0000000..cc7c72d --- /dev/null +++ b/create.h @@ -0,0 +1,48 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +class Archive_attrs + { + struct stat ast; // archive attributes at time of init + bool initialized; + bool isreg; + +public: + Archive_attrs() : initialized( false ), isreg( false ) {} + bool init( const int fd ) + { + if( fstat( fd, &ast ) != 0 ) return false; + if( S_ISREG( ast.st_mode ) ) isreg = true; + initialized = true; + return true; + } + bool is_the_archive( const struct stat & st ) const + { return isreg && st.st_dev == ast.st_dev && st.st_ino == ast.st_ino; } + bool is_newer( const struct stat & st ) const + { return initialized && st.st_mtime > ast.st_mtime; } + bool is_newer( const char * const filename ) const + { + if( !initialized ) return false; + struct stat st; + return lstat( filename, &st ) != 0 || st.st_mtime > ast.st_mtime; + } + }; + +extern Archive_attrs archive_attrs; + +const char * const cant_stat = "Can't stat input file"; +const char * const eferec_msg = "Error formatting extended records."; diff --git a/create_lz.cc b/create_lz.cc new file mode 100644 index 0000000..1acaf23 --- /dev/null +++ b/create_lz.cc @@ -0,0 +1,600 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include // for lzlib.h +#include +#include +#include +#include + +#include "tarlz.h" +#include "arg_parser.h" +#include "create.h" + + +namespace { + +const Cl_options * gcl_opts = 0; // local vars needed by add_member_lz +enum { max_packet_size = 1 << 20 }; +class Packet_courier; +Packet_courier * courierp = 0; +unsigned long long partial_data_size = 0; // size of current block + + +class Slot_tally + { + const int num_slots; // total slots + int num_free; // remaining free slots + pthread_mutex_t mutex; + pthread_cond_t slot_av; // slot available + + Slot_tally( const Slot_tally & ); // declared as private + void operator=( const Slot_tally & ); // declared as private + +public: + explicit Slot_tally( const int slots ) + : num_slots( slots ), num_free( slots ) + { xinit_mutex( &mutex ); xinit_cond( &slot_av ); } + + ~Slot_tally() { xdestroy_cond( &slot_av ); xdestroy_mutex( &mutex ); } + + bool all_free() { return ( num_free == num_slots ); } + + void get_slot() // wait for a free slot + { + xlock( &mutex ); + while( num_free <= 0 ) xwait( &slot_av, &mutex ); + --num_free; + xunlock( &mutex ); + } + + void leave_slot() // return a slot to the tally + { + xlock( &mutex ); + if( ++num_free == 1 ) xsignal( &slot_av ); // num_free was 0 + xunlock( &mutex ); + } + }; + + +struct Ipacket // filename, file size and headers + { + const long long file_size; + const std::string filename; // filename.empty() means end of lzip member + const Extended * const extended; + const uint8_t * const header; + + Ipacket() : file_size( 0 ), extended( 0 ), header( 0 ) {} + Ipacket( const char * const name, const long long s, + const Extended * const ext, const uint8_t * const head ) + : file_size( s ), filename( name ), extended( ext ), header( head ) {} + }; + +struct Opacket // compressed data to be written to the archive + { + const uint8_t * const data; // data == 0 means end of lzip member + const int size; // number of bytes in data (if any) + + Opacket() : data( 0 ), size( 0 ) {} + Opacket( uint8_t * const d, const int s ) : data( d ), size( s ) {} + }; + + +class Packet_courier // moves packets around + { +public: + unsigned icheck_counter; + unsigned iwait_counter; + unsigned ocheck_counter; + unsigned owait_counter; +private: + int receive_worker_id; // worker queue currently receiving packets + int deliver_worker_id; // worker queue currently delivering packets + Slot_tally slot_tally; // limits the number of input packets + std::vector< std::queue< const Ipacket * > > ipacket_queues; + std::vector< std::queue< const Opacket * > > opacket_queues; + int num_working; // number of workers still running + const int num_workers; // number of workers + const unsigned out_slots; // max output packets per queue + pthread_mutex_t imutex; + pthread_cond_t iav_or_eof; // input packet available or grouper done + pthread_mutex_t omutex; + pthread_cond_t oav_or_exit; // output packet available or all workers exited + std::vector< pthread_cond_t > slot_av; // output slot available + bool eof; // grouper done + + Packet_courier( const Packet_courier & ); // declared as private + void operator=( const Packet_courier & ); // declared as private + +public: + Packet_courier( const int workers, const int in_slots, const int oslots ) + : icheck_counter( 0 ), iwait_counter( 0 ), + ocheck_counter( 0 ), owait_counter( 0 ), + receive_worker_id( 0 ), deliver_worker_id( 0 ), + slot_tally( in_slots ), ipacket_queues( workers ), + opacket_queues( workers ), num_working( workers ), + num_workers( workers ), out_slots( oslots ), slot_av( workers ), + eof( false ) + { + xinit_mutex( &imutex ); xinit_cond( &iav_or_eof ); + xinit_mutex( &omutex ); xinit_cond( &oav_or_exit ); + for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] ); + } + + ~Packet_courier() + { + for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy_cond( &slot_av[i] ); + xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex ); + xdestroy_cond( &iav_or_eof ); xdestroy_mutex( &imutex ); + } + + /* Receive an ipacket from grouper. + If filename.empty() (end of lzip member token), move to next queue. */ + void receive_packet( const Ipacket * const ipacket ) + { + if( !ipacket->filename.empty() ) + slot_tally.get_slot(); // wait for a free slot + xlock( &imutex ); + ipacket_queues[receive_worker_id].push( ipacket ); + if( ipacket->filename.empty() && ++receive_worker_id >= num_workers ) + receive_worker_id = 0; + xbroadcast( &iav_or_eof ); + xunlock( &imutex ); + } + + // distribute an ipacket to a worker + const Ipacket * distribute_packet( const int worker_id ) + { + const Ipacket * ipacket = 0; + xlock( &imutex ); + ++icheck_counter; + while( ipacket_queues[worker_id].empty() && !eof ) + { + ++iwait_counter; + xwait( &iav_or_eof, &imutex ); + } + if( !ipacket_queues[worker_id].empty() ) + { + ipacket = ipacket_queues[worker_id].front(); + ipacket_queues[worker_id].pop(); + } + xunlock( &imutex ); + if( ipacket ) + { if( !ipacket->filename.empty() ) slot_tally.leave_slot(); } + else + { + // notify muxer when last worker exits + xlock( &omutex ); + if( --num_working == 0 ) xsignal( &oav_or_exit ); + xunlock( &omutex ); + } + return ipacket; + } + + // collect an opacket from a worker + void collect_packet( const Opacket * const opacket, const int worker_id ) + { + xlock( &omutex ); + if( opacket->data ) + { + while( opacket_queues[worker_id].size() >= out_slots ) + xwait( &slot_av[worker_id], &omutex ); + } + opacket_queues[worker_id].push( opacket ); + if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit ); + xunlock( &omutex ); + } + + /* Deliver an opacket to muxer. + If opacket data == 0, move to next queue and wait again. */ + const Opacket * deliver_packet() + { + const Opacket * opacket = 0; + xlock( &omutex ); + ++ocheck_counter; + while( true ) + { + while( opacket_queues[deliver_worker_id].empty() && num_working > 0 ) + { + ++owait_counter; + xwait( &oav_or_exit, &omutex ); + } + if( opacket_queues[deliver_worker_id].empty() ) break; + opacket = opacket_queues[deliver_worker_id].front(); + opacket_queues[deliver_worker_id].pop(); + if( opacket_queues[deliver_worker_id].size() + 1 == out_slots ) + xsignal( &slot_av[deliver_worker_id] ); + if( opacket->data ) break; + if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0; + delete opacket; opacket = 0; + } + xunlock( &omutex ); + return opacket; + } + + void finish() // grouper has no more packets to send + { + xlock( &imutex ); + eof = true; + xbroadcast( &iav_or_eof ); + xunlock( &imutex ); + } + + bool finished() // all packets delivered to muxer + { + if( !slot_tally.all_free() || !eof || num_working != 0 ) return false; + for( int i = 0; i < num_workers; ++i ) + if( !ipacket_queues[i].empty() ) return false; + for( int i = 0; i < num_workers; ++i ) + if( !opacket_queues[i].empty() ) return false; + return true; + } + }; + + +// send one ipacket with tar member metadata to courier +int add_member_lz( const char * const filename, const struct stat *, + const int flag, struct FTW * ) + { + if( Exclude::excluded( filename ) ) return 0; // skip excluded files + long long file_size; + // metadata for extended records + Extended * const extended = new( std::nothrow ) Extended; + uint8_t * const header = extended ? new( std::nothrow ) Tar_header : 0; + if( !header ) + { show_error( mem_msg ); if( extended ) delete extended; return 1; } + if( !fill_headers( filename, *extended, header, file_size, flag ) ) + { delete[] header; delete extended; return 0; } + print_removed_prefix( extended->removed_prefix ); + + if( gcl_opts->solidity == bsolid && + block_is_full( extended->full_size(), file_size, gcl_opts->data_size, + partial_data_size ) ) + courierp->receive_packet( new Ipacket ); // end of group + + courierp->receive_packet( new Ipacket( filename, file_size, extended, header ) ); + + if( gcl_opts->solidity == no_solid ) // one tar member per group + courierp->receive_packet( new Ipacket ); + if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename ); + return 0; + } + + +struct Grouper_arg + { + const Cl_options * cl_opts; + Packet_courier * courier; + }; + + +/* Package metadata of the files to be archived and pass them to the + courier for distribution to workers. +*/ +extern "C" void * grouper( void * arg ) + { + const Grouper_arg & tmp = *(const Grouper_arg *)arg; + const Cl_options & cl_opts = *tmp.cl_opts; + Packet_courier & courier = *tmp.courier; + + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) // parse command line + { + const int code = cl_opts.parser.code( i ); + const std::string & arg = cl_opts.parser.argument( i ); + const char * filename = arg.c_str(); + if( code == 'C' && chdir( filename ) != 0 ) + { show_file_error( filename, chdir_msg, errno ); exit_fail_mt(); } + if( code ) continue; // skip options + if( cl_opts.parser.argument( i ).empty() ) continue; // skip empty names + std::string deslashed; // arg without trailing slashes + unsigned len = arg.size(); + while( len > 1 && arg[len-1] == '/' ) --len; + if( len < arg.size() ) + { deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); } + if( Exclude::excluded( filename ) ) continue; // skip excluded files + struct stat st; + if( lstat( filename, &st ) != 0 ) // filename from command line + { show_file_error( filename, cant_stat, errno ); set_error_status( 1 ); } + else if( nftw( filename, add_member_lz, 16, + cl_opts.dereference ? 0 : FTW_PHYS ) != 0 ) + exit_fail_mt(); // write error or OOM + else if( cl_opts.solidity == dsolid ) // end of group + courier.receive_packet( new Ipacket ); + } + + if( cl_opts.solidity == bsolid && partial_data_size ) // finish last block + { partial_data_size = 0; courierp->receive_packet( new Ipacket ); } + courier.finish(); // no more packets to send + return 0; + } + + +/* Writes ibuf to encoder. To minimize dictionary size, it does not read + from encoder until encoder's input buffer is full or finish is true. + Sends opacket to courier and allocates new obuf each time obuf is full. +*/ +void loop_encode( const uint8_t * const ibuf, const int isize, + uint8_t * & obuf, int & opos, Packet_courier & courier, + LZ_Encoder * const encoder, const int worker_id, + const bool finish = false ) + { + int ipos = 0; + if( opos < 0 || opos > max_packet_size ) + internal_error( "bad buffer index in loop_encode." ); + while( true ) + { + if( ipos < isize ) + { + const int wr = LZ_compress_write( encoder, ibuf + ipos, isize - ipos ); + if( wr < 0 ) internal_error( "library error (LZ_compress_write)." ); + ipos += wr; + } + if( ipos >= isize ) // ibuf is empty + { if( finish ) LZ_compress_finish( encoder ); else break; } + const int rd = + LZ_compress_read( encoder, obuf + opos, max_packet_size - opos ); + if( rd < 0 ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "LZ_compress_read error: %s\n", + LZ_strerror( LZ_compress_errno( encoder ) ) ); + exit_fail_mt(); + } + opos += rd; + // obuf is full or last opacket in lzip member + if( opos >= max_packet_size || LZ_compress_finished( encoder ) == 1 ) + { + if( opos > max_packet_size ) + internal_error( "opacket size exceeded in worker." ); + courier.collect_packet( new Opacket( obuf, opos ), worker_id ); + opos = 0; obuf = new( std::nothrow ) uint8_t[max_packet_size]; + if( !obuf ) { show_error( mem_msg2 ); exit_fail_mt(); } + if( LZ_compress_finished( encoder ) == 1 ) + { + if( LZ_compress_restart_member( encoder, LLONG_MAX ) >= 0 ) break; + show_error( "LZ_compress_restart_member failed." ); exit_fail_mt(); + } + } + } + if( ipos > isize ) internal_error( "ipacket size exceeded in worker." ); + if( ipos < isize ) internal_error( "input not fully consumed in worker." ); + } + + +struct Worker_arg + { + Packet_courier * courier; + int dictionary_size; + int match_len_limit; + int worker_id; + }; + + +/* Get ipackets from courier, compress headers and file data, and give the + opackets produced to courier. +*/ +extern "C" void * cworker( void * arg ) + { + const Worker_arg & tmp = *(const Worker_arg *)arg; + Packet_courier & courier = *tmp.courier; + const int dictionary_size = tmp.dictionary_size; + const int match_len_limit = tmp.match_len_limit; + const int worker_id = tmp.worker_id; + + LZ_Encoder * encoder = 0; + uint8_t * data = 0; + Resizable_buffer rbuf; // extended header + data + if( !rbuf.size() ) { show_error( mem_msg2 ); exit_fail_mt(); } + + int opos = 0; + bool flushed = true; // avoid producing empty lzip members + while( true ) + { + const Ipacket * const ipacket = courier.distribute_packet( worker_id ); + if( !ipacket ) break; // no more packets to process + if( ipacket->filename.empty() ) // end of group + { + if( !flushed ) // this lzip member is not empty + loop_encode( 0, 0, data, opos, courier, encoder, worker_id, true ); + courier.collect_packet( new Opacket, worker_id ); // end of member token + flushed = true; delete ipacket; continue; + } + + const char * const filename = ipacket->filename.c_str(); + const int infd = + ipacket->file_size ? open_instream( filename ) : -1; + if( ipacket->file_size && infd < 0 ) // can't read file data + { delete[] ipacket->header; delete ipacket->extended; delete ipacket; + set_error_status( 1 ); continue; } // skip file + + flushed = false; + if( !encoder ) // init encoder just before using it + { + data = new( std::nothrow ) uint8_t[max_packet_size]; + encoder = LZ_compress_open( dictionary_size, match_len_limit, LLONG_MAX ); + if( !data || !encoder || LZ_compress_errno( encoder ) != LZ_ok ) + { + if( !data || !encoder || LZ_compress_errno( encoder ) == LZ_mem_error ) + show_error( mem_msg2 ); + else + internal_error( "invalid argument to encoder." ); + exit_fail_mt(); + } + } + + if( !ipacket->extended->empty() ) // compress extended block + { + const long long ebsize = ipacket->extended->format_block( rbuf ); + if( ebsize < 0 ) + { show_error( ( ebsize == -2 ) ? mem_msg2 : eferec_msg ); exit_fail_mt(); } + /* Limit the size of the extended block to INT_MAX - 1 so that it can + be fed to lzlib as one buffer. */ + if( ebsize >= INT_MAX ) + { show_error( "Extended records size >= INT_MAX." ); exit_fail_mt(); } + loop_encode( rbuf.u8(), ebsize, data, opos, courier, encoder, worker_id ); + } + // compress ustar header + loop_encode( ipacket->header, header_size, data, opos, courier, + encoder, worker_id ); + delete[] ipacket->header; delete ipacket->extended; + + if( ipacket->file_size ) + { + const long long bufsize = 32 * header_size; + uint8_t buf[bufsize]; + long long rest = ipacket->file_size; + while( rest > 0 ) + { + int size = std::min( rest, bufsize ); + const int rd = readblock( infd, buf, size ); + rest -= rd; + if( rd != size ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "File '%s' ends unexpectedly at pos %llu\n", + filename, ipacket->file_size - rest ); + close( infd ); exit_fail_mt(); + } + if( rest == 0 ) // last read + { + const int rem = ipacket->file_size % header_size; + if( rem > 0 ) + { const int padding = header_size - rem; + std::memset( buf + size, 0, padding ); size += padding; } + } + // compress size bytes of file + loop_encode( buf, size, data, opos, courier, encoder, worker_id ); + } + if( close( infd ) != 0 ) + { show_file_error( filename, eclosf_msg, errno ); exit_fail_mt(); } + } + if( gcl_opts->warn_newer && archive_attrs.is_newer( filename ) ) + { show_file_error( filename, "File is newer than the archive." ); + set_error_status( 1 ); } + delete ipacket; + } + if( data ) delete[] data; + if( encoder && LZ_compress_close( encoder ) < 0 ) + { show_error( "LZ_compress_close failed." ); exit_fail_mt(); } + return 0; + } + + +/* Get from courier the processed and sorted packets, and write + their contents to the output archive. +*/ +void muxer( Packet_courier & courier, const int outfd ) + { + while( true ) + { + const Opacket * const opacket = courier.deliver_packet(); + if( !opacket ) break; // queue is empty. all workers exited + + if( !writeblock_wrapper( outfd, opacket->data, opacket->size ) ) + exit_fail_mt(); + delete[] opacket->data; + delete opacket; + } + } + +} // end namespace + + +// init the courier, then start the grouper and the workers and call the muxer +int encode_lz( const Cl_options & cl_opts, const char * const archive_namep, + const int outfd ) + { + const int in_slots = 65536; // max small files (<=512B) in 64 MiB + const int num_workers = cl_opts.num_workers; + const int total_in_slots = ( INT_MAX / num_workers >= in_slots ) ? + num_workers * in_slots : INT_MAX; + const int dictionary_size = option_mapping[cl_opts.level].dictionary_size; + const int match_len_limit = option_mapping[cl_opts.level].match_len_limit; + gcl_opts = &cl_opts; + + /* If an error happens after any threads have been started, exit must be + called before courier goes out of scope. */ + Packet_courier courier( num_workers, total_in_slots, cl_opts.out_slots ); + courierp = &courier; // needed by add_member_lz + + Grouper_arg grouper_arg; + grouper_arg.cl_opts = &cl_opts; + grouper_arg.courier = &courier; + + pthread_t grouper_thread; + int errcode = pthread_create( &grouper_thread, 0, grouper, &grouper_arg ); + if( errcode ) + { show_error( "Can't create grouper thread", errcode ); return 1; } + + Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers]; + pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers]; + if( !worker_args || !worker_threads ) + { show_error( mem_msg ); exit_fail_mt(); } + for( int i = 0; i < num_workers; ++i ) + { + worker_args[i].courier = &courier; + worker_args[i].dictionary_size = dictionary_size; + worker_args[i].match_len_limit = match_len_limit; + worker_args[i].worker_id = i; + errcode = pthread_create( &worker_threads[i], 0, cworker, &worker_args[i] ); + if( errcode ) + { show_error( "Can't create worker threads", errcode ); exit_fail_mt(); } + } + + muxer( courier, outfd ); + + for( int i = num_workers - 1; i >= 0; --i ) + { + errcode = pthread_join( worker_threads[i], 0 ); + if( errcode ) + { show_error( "Can't join worker threads", errcode ); exit_fail_mt(); } + } + delete[] worker_threads; + delete[] worker_args; + + errcode = pthread_join( grouper_thread, 0 ); + if( errcode ) + { show_error( "Can't join grouper thread", errcode ); exit_fail_mt(); } + + // write End-Of-Archive records + int retval = !write_eoa_records( outfd, true ); + + if( close( outfd ) != 0 && retval == 0 ) + { show_file_error( archive_namep, eclosa_msg, errno ); retval = 1; } + + if( cl_opts.debug_level & 1 ) + std::fprintf( stderr, + "any worker tried to consume from grouper %8u times\n" + "any worker had to wait %8u times\n" + "muxer tried to consume from workers %8u times\n" + "muxer had to wait %8u times\n", + courier.icheck_counter, + courier.iwait_counter, + courier.ocheck_counter, + courier.owait_counter ); + + if( !courier.finished() ) internal_error( "courier not finished." ); + return final_exit_status( retval ); + } diff --git a/decode.cc b/decode.cc new file mode 100644 index 0000000..a45a1fd --- /dev/null +++ b/decode.cc @@ -0,0 +1,492 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include // for lzlib.h +#include +#include +#include +#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ + !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__ +#include // for major, minor, makedev +#endif +#include + +#include "tarlz.h" +#include "arg_parser.h" +#include "lzip_index.h" +#include "archive_reader.h" +#include "decode.h" + + +namespace { + +Resizable_buffer grbuf; + +bool skip_warn( const bool reset = false ) // avoid duplicate warnings + { + static bool skipping = false; + + if( reset ) skipping = false; + else if( !skipping ) + { skipping = true; show_error( "Skipping to next header." ); return true; } + return false; + } + + +void read_error( const Archive_reader & ar ) + { + show_file_error( ar.ad.namep, ar.e_msg(), ar.e_code() ); + if( ar.e_skip() ) skip_warn(); + } + + +int skip_member( Archive_reader & ar, const Extended & extended, + const Typeflag typeflag ) + { + if( data_may_follow( typeflag ) ) + { const int ret = ar.skip_member( extended ); + if( ret != 0 ) { read_error( ar ); if( ar.fatal() ) return ret; } } + return 0; + } + + +int compare_member( const Cl_options & cl_opts, Archive_reader & ar, + const Extended & extended, const Tar_header header ) + { + if( !show_member_name( extended, header, 1, grbuf ) ) return 1; + std::string estr, ostr; + const bool stat_differs = + !compare_file_type( estr, ostr, cl_opts, extended, header ); + if( estr.size() ) std::fputs( estr.c_str(), stderr ); + if( ostr.size() ) { std::fputs( ostr.c_str(), stdout ); std::fflush( stdout ); } + if( extended.file_size() <= 0 ) return 0; + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( ( typeflag != tf_regular && typeflag != tf_hiperf ) || stat_differs ) + return skip_member( ar, extended, typeflag ); + // else compare file contents + const char * const filename = extended.path().c_str(); + const int infd2 = open_instream( filename ); + if( infd2 < 0 ) + { set_error_status( 1 ); return skip_member( ar, extended, typeflag ); } + int retval = compare_file_contents( estr, ostr, ar, extended.file_size(), + filename, infd2 ); + if( retval ) { read_error( ar ); if( !ar.fatal() ) retval = 0; } + else { if( estr.size() ) std::fputs( estr.c_str(), stderr ); + if( ostr.size() ) + { std::fputs( ostr.c_str(), stdout ); std::fflush( stdout ); } } + return retval; + } + + +int list_member( Archive_reader & ar, + const Extended & extended, const Tar_header header ) + { + if( !show_member_name( extended, header, 0, grbuf ) ) return 1; + return skip_member( ar, extended, (Typeflag)header[typeflag_o] ); + } + + +int extract_member( const Cl_options & cl_opts, Archive_reader & ar, + const Extended & extended, const Tar_header header ) + { + const char * const filename = extended.path().c_str(); + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( contains_dotdot( filename ) ) + { + show_file_error( filename, dotdot_msg ); + return skip_member( ar, extended, typeflag ); + } + mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits + if( geteuid() != 0 && !cl_opts.preserve_permissions ) mode &= ~get_umask(); + int outfd = -1; + + if( !show_member_name( extended, header, 1, grbuf ) ) return 1; + // remove file (or empty dir) before extraction to prevent following links + std::remove( filename ); + if( !make_path( filename ) ) + { + show_file_error( filename, intdir_msg, errno ); + set_error_status( 1 ); + return skip_member( ar, extended, typeflag ); + } + + switch( typeflag ) + { + case tf_regular: + case tf_hiperf: + outfd = open_outstream( filename ); + if( outfd < 0 ) + { set_error_status( 1 ); return skip_member( ar, extended, typeflag ); } + break; + case tf_link: + case tf_symlink: + { + const char * const linkname = extended.linkpath().c_str(); + const bool hard = typeflag == tf_link; + if( ( hard && link( linkname, filename ) != 0 ) || + ( !hard && symlink( linkname, filename ) != 0 ) ) + { + print_error( errno, cantln_msg, hard ? "" : "sym", linkname, filename ); + set_error_status( 1 ); + } + } break; + case tf_directory: + if( mkdir( filename, mode ) != 0 && errno != EEXIST ) + { + show_file_error( filename, mkdir_msg, errno ); + set_error_status( 1 ); + } + break; + case tf_chardev: + case tf_blockdev: + { + const unsigned dev = + makedev( parse_octal( header + devmajor_o, devmajor_l ), + parse_octal( header + devminor_o, devminor_l ) ); + const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode; + if( mknod( filename, dmode, dev ) != 0 ) + { + show_file_error( filename, mknod_msg, errno ); + set_error_status( 1 ); + } + break; + } + case tf_fifo: + if( mkfifo( filename, mode ) != 0 ) + { + show_file_error( filename, mkfifo_msg, errno ); + set_error_status( 1 ); + } + break; + default: + print_error( 0, uftype_msg, filename, typeflag ); + set_error_status( 2 ); + return skip_member( ar, extended, typeflag ); + } + + const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); + errno = 0; + if( !islink && + ( !uid_gid_in_range( extended.get_uid(), extended.get_gid() ) || + chown( filename, extended.get_uid(), extended.get_gid() ) != 0 ) ) + { + if( outfd >= 0 ) mode &= ~( S_ISUID | S_ISGID | S_ISVTX ); + // chown will in many cases return with EPERM, which can be safely ignored. + if( errno != EPERM && errno != EINVAL ) + { show_file_error( filename, chown_msg, errno ); set_error_status( 1 ); } + } + + if( outfd >= 0 ) fchmod( outfd, mode ); // ignore errors + + if( data_may_follow( typeflag ) ) + { + const int bufsize = 32 * header_size; + uint8_t buf[bufsize]; + long long rest = extended.file_size(); + const int rem = rest % header_size; + const int padding = rem ? header_size - rem : 0; + while( rest > 0 ) + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; + const int ret = ar.read( buf, rsize ); + if( ret != 0 ) + { + read_error( ar ); + if( outfd >= 0 ) + { + if( cl_opts.keep_damaged ) + { writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) ); + close( outfd ); } + else { close( outfd ); std::remove( filename ); } + } + if( ar.fatal() ) return ret; else return 0; + } + const int wsize = ( rest >= bufsize ) ? bufsize : rest; + if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize ) + { show_file_error( filename, werr_msg, errno ); return 1; } + rest -= wsize; + } + } + if( outfd >= 0 && close( outfd ) != 0 ) + { show_file_error( filename, eclosf_msg, errno ); return 1; } + if( !islink ) + { + struct utimbuf t; + t.actime = extended.atime().sec(); + t.modtime = extended.mtime().sec(); + utime( filename, &t ); // ignore errors + } + return 0; + } + + +void format_file_diff( std::string & ostr, const char * const filename, + const char * const msg ) + { if( verbosity >= 0 ) + { ostr += filename; ostr += ": "; ostr += msg; ostr += '\n'; } } + +} // end namespace + + +bool compare_file_type( std::string & estr, std::string & ostr, + const Cl_options & cl_opts, + const Extended & extended, const Tar_header header ) + { + const char * const filename = extended.path().c_str(); + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + struct stat st; + bool diff = false, size_differs = false, type_differs = true; + if( hstat( filename, &st, cl_opts.dereference ) != 0 ) + format_file_error( estr, filename, "warning: Can't stat", errno ); + else if( ( typeflag == tf_regular || typeflag == tf_hiperf ) && + !S_ISREG( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a regular file" ); + else if( typeflag == tf_symlink && !S_ISLNK( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a symlink" ); + else if( typeflag == tf_chardev && !S_ISCHR( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a character device" ); + else if( typeflag == tf_blockdev && !S_ISBLK( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a block device" ); + else if( typeflag == tf_directory && !S_ISDIR( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a directory" ); + else if( typeflag == tf_fifo && !S_ISFIFO( st.st_mode ) ) + format_file_diff( ostr, filename, "Is not a FIFO" ); + else + { + type_differs = false; + if( typeflag != tf_symlink ) + { + const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits + if( mode != ( st.st_mode & ( S_ISUID | S_ISGID | S_ISVTX | + S_IRWXU | S_IRWXG | S_IRWXO ) ) ) + { format_file_diff( ostr, filename, "Mode differs" ); diff = true; } + } + if( !cl_opts.ignore_ids ) + { + if( extended.get_uid() != (long long)st.st_uid ) + { format_file_diff( ostr, filename, "Uid differs" ); diff = true; } + if( extended.get_gid() != (long long)st.st_gid ) + { format_file_diff( ostr, filename, "Gid differs" ); diff = true; } + } + if( typeflag != tf_symlink ) + { + if( typeflag != tf_directory && + extended.mtime().sec() != (long long)st.st_mtime ) + { + if( (time_t)extended.mtime().sec() == st.st_mtime ) + { if( !cl_opts.ignore_overflow ) { diff = true; + format_file_diff( ostr, filename, "Mod time overflow" ); } } + else { diff = true; + format_file_diff( ostr, filename, "Mod time differs" ); } + } + if( ( typeflag == tf_regular || typeflag == tf_hiperf ) && + extended.file_size() != st.st_size ) // don't compare contents + { format_file_diff( ostr, filename, "Size differs" ); size_differs = true; } + if( ( typeflag == tf_chardev || typeflag == tf_blockdev ) && + ( parse_octal( header + devmajor_o, devmajor_l ) != + (unsigned)major( st.st_rdev ) || + parse_octal( header + devminor_o, devminor_l ) != + (unsigned)minor( st.st_rdev ) ) ) + { format_file_diff( ostr, filename, "Device number differs" ); diff = true; } + } + else + { + char * const buf = new char[st.st_size+1]; + long len = readlink( filename, buf, st.st_size ); + bool e = ( len != st.st_size ); + if( !e ) + { + while( len > 1 && buf[len-1] == '/' ) --len; // trailing '/' + buf[len] = 0; + if( extended.linkpath() != buf ) e = true; + } + delete[] buf; + if( e ) { format_file_diff( ostr, filename, "Symlink differs" ); diff = true; } + } + } + if( diff || size_differs || type_differs ) set_error_status( 1 ); + return !( size_differs || type_differs ); + } + + +bool compare_file_contents( std::string & estr, std::string & ostr, + Archive_reader_base & ar, const long long file_size, + const char * const filename, const int infd2 ) + { + long long rest = file_size; + const int rem = rest % header_size; + const int padding = rem ? header_size - rem : 0; + const int bufsize = 32 * header_size; + uint8_t buf1[bufsize]; + uint8_t buf2[bufsize]; + int retval = 0; + bool diff = false; + estr.clear(); ostr.clear(); + while( rest > 0 ) + { + const int rsize1 = ( rest >= bufsize ) ? bufsize : rest + padding; + const int rsize2 = ( rest >= bufsize ) ? bufsize : rest; + if( ( retval = ar.read( buf1, rsize1 ) ) != 0 ) { diff = true; break; } + if( !diff ) + { + const int rd = readblock( infd2, buf2, rsize2 ); + if( rd != rsize2 ) + { + if( errno ) format_file_error( estr, filename, "Read error", errno ); + else format_file_diff( ostr, filename, "EOF found in file" ); + diff = true; + } + else + { + int i = 0; while( i < rsize2 && buf1[i] == buf2[i] ) ++i; + if( i < rsize2 ) + { format_file_diff( ostr, filename, "Contents differ" ); diff = true; } + } + } + if( rest < bufsize ) break; + rest -= rsize1; + } + close( infd2 ); + if( diff ) set_error_status( 1 ); + return retval; + } + + +int decode( const Cl_options & cl_opts ) + { + if( !grbuf.size() ) { show_error( mem_msg ); return 1; } + // open archive before changing working directory + const Archive_descriptor ad( cl_opts.archive_name ); + if( ad.infd < 0 ) return 1; + + // Execute -C options and mark filenames to be compared, extracted or listed. + // name_pending is of type char instead of bool to allow concurrent update. + std::vector< char > name_pending( cl_opts.parser.arguments(), false ); + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + { + const int code = cl_opts.parser.code( i ); + if( code == 'C' && cl_opts.program_mode != m_list ) + { + const char * const dir = cl_opts.parser.argument( i ).c_str(); + if( chdir( dir ) != 0 ) + { show_file_error( dir, chdir_msg, errno ); return 1; } + } + if( !code && cl_opts.parser.argument( i ).size() && + !Exclude::excluded( cl_opts.parser.argument( i ).c_str() ) ) + name_pending[i] = true; + } + + // multi-threaded --list is faster even with 1 thread and 1 file in archive + // but multi-threaded --diff and --extract probably need at least 2 of each + if( ( cl_opts.program_mode == m_diff || cl_opts.program_mode == m_list || + cl_opts.program_mode == m_extract ) && cl_opts.num_workers > 0 && + ad.indexed && ad.lzip_index.members() >= 2 ) // one file + EOA + { + // show_file_error( ad.namep, "Is compressed seekable" ); + return decode_lz( cl_opts, ad, name_pending ); + } + + Archive_reader ar( ad ); // serial reader + Extended extended; // metadata from extended records + int retval = 0; + bool prev_extended = false; // prev header was extended + while( true ) // process one tar header per iteration + { + Tar_header header; + const int ret = ar.read( header, header_size ); + if( ret != 0 ) { read_error( ar ); if( ar.fatal() ) { retval = ret; break; } } + if( ret != 0 || !verify_ustar_chksum( header ) ) // error or EOA + { + if( ret == 0 && block_is_zero( header, header_size ) ) // EOA + { + if( !prev_extended || cl_opts.permissive ) break; + show_file_error( ad.namep, fv_msg1 ); + retval = 2; break; + } + if( skip_warn() && verbosity >= 2 ) + std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( header ) ); + set_error_status( 2 ); continue; + } + skip_warn( true ); // reset warning + + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_global ) + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg2 ); retval = 2; break; } + Extended dummy; // global headers are parsed and ignored + const int ret = ar.parse_records( dummy, header, grbuf, gblrec_msg, true ); + if( ret != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); + if( ar.fatal() ) { retval = ret; break; } + skip_warn(); set_error_status( ret ); } + continue; + } + if( typeflag == tf_extended ) + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg3 ); retval = 2; break; } + const int ret = ar.parse_records( extended, header, grbuf, extrec_msg, + cl_opts.permissive ); + if( ret != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); + if( ar.fatal() ) { retval = ret; break; } + skip_warn(); extended.reset(); set_error_status( ret ); } + else if( !extended.crc_present() && cl_opts.missing_crc ) + { show_file_error( ad.namep, miscrc_msg ); retval = 2; break; } + prev_extended = true; continue; + } + prev_extended = false; + + extended.fill_from_ustar( header ); // copy metadata from header + + // members without name are skipped except when listing + if( check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) + retval = skip_member( ar, extended, typeflag ); + else + { + print_removed_prefix( extended.removed_prefix ); + if( cl_opts.program_mode == m_list ) + retval = list_member( ar, extended, header ); + else if( extended.path().empty() ) + retval = skip_member( ar, extended, typeflag ); + else if( cl_opts.program_mode == m_diff ) + retval = compare_member( cl_opts, ar, extended, header ); + else retval = extract_member( cl_opts, ar, extended, header ); + } + extended.reset(); + if( retval ) + { show_error( "Error is not recoverable: exiting now." ); break; } + } + + if( close( ad.infd ) != 0 && retval == 0 ) + { show_file_error( ad.namep, eclosa_msg, errno ); retval = 1; } + + if( retval == 0 ) + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + if( nonempty_arg( cl_opts.parser, i ) && name_pending[i] ) + { show_file_error( cl_opts.parser.argument( i ).c_str(), nfound_msg ); + retval = 1; } + return final_exit_status( retval, cl_opts.program_mode != m_diff ); + } diff --git a/decode.h b/decode.h new file mode 100644 index 0000000..45143fd --- /dev/null +++ b/decode.h @@ -0,0 +1,32 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +inline bool data_may_follow( const Typeflag typeflag ) + { return typeflag <= 0 || typeflag >= 7; } + +inline bool uid_gid_in_range( const long long uid, const long long gid ) + { return uid == (long long)( (uid_t)uid ) && + gid == (long long)( (gid_t)gid ); } + +const char * const dotdot_msg = "Contains a '..' component, skipping."; +const char * const intdir_msg = "Failed to create intermediate directory"; +const char * const cantln_msg = "Can't %slink '%s' to '%s'"; +const char * const mkdir_msg = "Can't create directory"; +const char * const mknod_msg = "Can't create device node"; +const char * const mkfifo_msg = "Can't create FIFO file"; +const char * const uftype_msg = "%s: Unknown file type '%c', skipping."; +const char * const chown_msg = "Can't change file owner"; diff --git a/decode_lz.cc b/decode_lz.cc new file mode 100644 index 0000000..8780eab --- /dev/null +++ b/decode_lz.cc @@ -0,0 +1,763 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include // for lzlib.h +#include +#include +#include +#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \ + !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__ +#include // for major, minor, makedev +#endif +#include + +#include "tarlz.h" +#include "arg_parser.h" +#include "lzip_index.h" +#include "archive_reader.h" +#include "decode.h" + +/* When a problem is detected by any worker: + - the worker requests mastership and returns. + - the courier discards new packets received or collected. + - the other workers return. + - the muxer drains the queue and returns. */ + +namespace { + +const char * const other_msg = "Other worker found an error."; + +/* line is preformatted and newline terminated except for prefix, error. + ok with an empty line is a no-op. */ +struct Packet // member name and metadata or error message + { + enum Status { ok, member_done, diag, prefix, error1, error2 }; + + long member_id; // lzip member containing the header of this tar member + std::string line; // member name and metadata ready to print, if any + Status status; // diagnostics and errors go to stderr + int errcode; // for error + Packet( const long i, const char * const msg, const Status s, const int e ) + : member_id( i ), line( msg ), status( s ), errcode( e ) {} + }; + + +class Packet_courier // moves packets around + { +public: + unsigned ocheck_counter; + unsigned owait_counter; +private: + long error_member_id; // first lzip member with error/misalign/eoa/eof + int deliver_worker_id; // worker queue currently delivering packets + int master_worker_id; // worker in charge if error/misalign/eoa/eof + std::vector< std::queue< const Packet * > > opacket_queues; + int num_working; // number of workers still running + const int num_workers; // number of workers + const unsigned out_slots; // max output packets per queue + pthread_mutex_t omutex; + pthread_cond_t oav_or_exit; // output packet available or all workers exited + std::vector< pthread_cond_t > slot_av; // output slot available + pthread_cond_t check_master; + bool eoa_found_; // EOA blocks found + + Packet_courier( const Packet_courier & ); // declared as private + void operator=( const Packet_courier & ); // declared as private + +public: + Packet_courier( const int workers, const int slots ) + : ocheck_counter( 0 ), owait_counter( 0 ), + error_member_id( -1 ), deliver_worker_id( 0 ), master_worker_id( -1 ), + opacket_queues( workers ), num_working( workers ), + num_workers( workers ), out_slots( slots ), slot_av( workers ), + eoa_found_( false ) + { + xinit_mutex( &omutex ); xinit_cond( &oav_or_exit ); + for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] ); + xinit_cond( &check_master ); + } + + ~Packet_courier() + { + xdestroy_cond( &check_master ); + for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy_cond( &slot_av[i] ); + xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex ); + } + + bool eoa_found() const { return eoa_found_; } + void report_eoa() { eoa_found_ = true; } + + bool mastership_granted() const { return master_worker_id >= 0; } + + bool request_mastership( const long member_id, const int worker_id ) + { + xlock( &omutex ); + if( mastership_granted() ) // already granted + { xunlock( &omutex ); return ( master_worker_id == worker_id ); } + if( error_member_id < 0 || error_member_id > member_id ) + error_member_id = member_id; + while( !mastership_granted() && ( worker_id != deliver_worker_id || + !opacket_queues[deliver_worker_id].empty() ) ) + xwait( &check_master, &omutex ); + if( !mastership_granted() && worker_id == deliver_worker_id && + opacket_queues[deliver_worker_id].empty() ) + { + master_worker_id = worker_id; // grant mastership + for( int i = 0; i < num_workers; ++i ) // delete all packets + while( !opacket_queues[i].empty() ) + opacket_queues[i].pop(); + xbroadcast( &check_master ); + xunlock( &omutex ); + return true; + } + xunlock( &omutex ); + return false; // mastership granted to another worker + } + + void worker_finished() + { + // notify muxer when last worker exits + xlock( &omutex ); + if( --num_working == 0 ) xsignal( &oav_or_exit ); + xunlock( &omutex ); + } + + /* Collect a packet from a worker. + If a packet is rejected, the worker must terminate. */ + bool collect_packet( const long member_id, const int worker_id, + const char * const msg, const Packet::Status status, + const int errcode = 0 ) + { + const Packet * const opacket = new Packet( member_id, msg, status, errcode ); + xlock( &omutex ); + if( ( mastership_granted() && master_worker_id != worker_id ) || + ( error_member_id >= 0 && error_member_id < opacket->member_id ) ) + { xunlock( &omutex ); delete opacket; return false; } // reject packet + while( opacket_queues[worker_id].size() >= out_slots ) + xwait( &slot_av[worker_id], &omutex ); + opacket_queues[worker_id].push( opacket ); + if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit ); + xunlock( &omutex ); + return true; + } + + /* Deliver a packet to muxer. + If packet.status == Packet::member_done, move to next queue. + If packet.line.empty(), wait again (empty lzip member). */ + const Packet * deliver_packet() + { + const Packet * opacket = 0; + xlock( &omutex ); + ++ocheck_counter; + while( true ) + { + while( opacket_queues[deliver_worker_id].empty() && num_working > 0 ) + { + ++owait_counter; + if( !mastership_granted() && error_member_id >= 0 ) + xbroadcast( &check_master ); // mastership requested not yet granted + xwait( &oav_or_exit, &omutex ); + } + if( opacket_queues[deliver_worker_id].empty() ) break; + opacket = opacket_queues[deliver_worker_id].front(); + opacket_queues[deliver_worker_id].pop(); + if( opacket_queues[deliver_worker_id].size() + 1 == out_slots ) + xsignal( &slot_av[deliver_worker_id] ); + if( opacket->status == Packet::member_done && !mastership_granted() ) + { if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0; } + if( !opacket->line.empty() ) break; + delete opacket; opacket = 0; + } + xunlock( &omutex ); + return opacket; + } + + bool finished() // all packets delivered to muxer + { + if( num_working != 0 ) return false; + for( int i = 0; i < num_workers; ++i ) + if( !opacket_queues[i].empty() ) return false; + return true; + } + }; + + +// prevent two threads from extracting the same file at the same time +class Name_monitor + { + std::vector< unsigned > crc_vector; + std::vector< std::string > name_vector; + pthread_mutex_t mutex; + +public: + Name_monitor( const int num_workers ) + : crc_vector( num_workers ), name_vector( num_workers ) + { if( num_workers > 0 ) xinit_mutex( &mutex ); } + + bool reserve_name( const unsigned worker_id, const std::string & filename ) + { + // compare the CRCs of the names, verify collisions comparing the names + const unsigned crc = + crc32c.compute_crc( (const uint8_t *)filename.c_str(), filename.size() ); + xlock( &mutex ); + for( unsigned i = 0; i < crc_vector.size(); ++i ) + if( crc_vector[i] == crc && crc != 0 && i != worker_id && + name_vector[i] == filename ) + { xunlock( &mutex ); return false; } // filename already reserved + crc_vector[worker_id] = crc; name_vector[worker_id] = filename; + xunlock( &mutex ); + return true; + } + }; + + +struct Trival // triple result value + { + const char * msg; + int errcode; + int retval; + explicit Trival( const char * const s = 0, const int e = 0, const int r = 0 ) + : msg( s ), errcode( e ), retval( r ) {} + }; + + +Trival skip_member_lz( Archive_reader_i & ar, Packet_courier & courier, + const Extended & extended, const long member_id, + const int worker_id, const Typeflag typeflag ) + { + if( data_may_follow( typeflag ) ) + { const int ret = ar.skip_member( extended ); + if( ret != 0 ) return Trival( ar.e_msg(), ar.e_code(), ret ); } + if( ar.at_member_end() && + !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) + return Trival( other_msg, 0, 1); + return Trival(); + } + + +Trival compare_member_lz( const Cl_options & cl_opts, + Archive_reader_i & ar, Packet_courier & courier, + const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const long member_id, + const int worker_id ) + { + if( verbosity < 1 ) rbuf()[0] = 0; + else if( !format_member_name( extended, header, rbuf, verbosity > 1 ) ) + return Trival( mem_msg, 0, 1 ); + std::string estr, ostr; + const bool stat_differs = + !compare_file_type( estr, ostr, cl_opts, extended, header ); + if( ( rbuf()[0] && !courier.collect_packet( member_id, worker_id, rbuf(), + Packet::ok ) ) || + ( estr.size() && !courier.collect_packet( member_id, worker_id, + estr.c_str(), Packet::diag ) ) || + ( ostr.size() && !courier.collect_packet( member_id, worker_id, + ostr.c_str(), Packet::ok ) ) || + ( extended.file_size() <= 0 && ar.at_member_end() && + !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) ) + return Trival( other_msg, 0, 1 ); + if( extended.file_size() <= 0 ) return Trival(); + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( ( typeflag != tf_regular && typeflag != tf_hiperf ) || stat_differs ) + return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag ); + // else compare file contents + const char * const filename = extended.path().c_str(); + const int infd2 = open_instream( filename ); + if( infd2 < 0 ) { set_error_status( 1 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag ); } + const int ret = compare_file_contents( estr, ostr, ar, extended.file_size(), + filename, infd2 ); + if( ret != 0 ) return Trival( ar.e_msg(), ar.e_code(), ret ); + if( ( estr.size() && !courier.collect_packet( member_id, worker_id, + estr.c_str(), Packet::diag ) ) || + ( ostr.size() && !courier.collect_packet( member_id, worker_id, + ostr.c_str(), Packet::ok ) ) || + ( ar.at_member_end() && + !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) ) + return Trival( other_msg, 0, 1 ); + return Trival(); + } + + +Trival list_member_lz( Archive_reader_i & ar, Packet_courier & courier, + const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const long member_id, + const int worker_id ) + { + if( verbosity < 0 ) rbuf()[0] = 0; + else if( !format_member_name( extended, header, rbuf, verbosity > 0 ) ) + return Trival( mem_msg, 0, 1 ); + const int ret = data_may_follow( (Typeflag)header[typeflag_o] ) ? + ar.skip_member( extended ) : 0; // print name even on read error + if( !courier.collect_packet( member_id, worker_id, rbuf(), + ar.at_member_end() ? Packet::member_done : Packet::ok ) ) + return Trival( other_msg, 0, 1 ); + if( ret != 0 ) return Trival( ar.e_msg(), ar.e_code(), ret ); + return Trival(); + } + + +Trival extract_member_lz( const Cl_options & cl_opts, + Archive_reader_i & ar, Packet_courier & courier, + const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const long member_id, + const int worker_id, Name_monitor & name_monitor ) + { + const char * const filename = extended.path().c_str(); + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( contains_dotdot( filename ) ) + { + if( format_file_error( rbuf, filename, dotdot_msg ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag ); + } + // skip member if another copy is already being extracted by another thread + if( !name_monitor.reserve_name( worker_id, extended.path() ) ) + { + if( verbosity >= 3 && format_file_error( rbuf, filename, + "Is being extracted by another thread, skipping." ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag ); + } + mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits + if( geteuid() != 0 && !cl_opts.preserve_permissions ) mode &= ~get_umask(); + int outfd = -1; + + if( verbosity >= 1 ) + { + if( !format_member_name( extended, header, rbuf, verbosity > 1 ) ) + return Trival( mem_msg, 0, 1 ); + if( !courier.collect_packet( member_id, worker_id, rbuf(), Packet::ok ) ) + return Trival( other_msg, 0, 1 ); + } + /* Remove file before extraction to prevent following links. + Don't remove an empty dir because other thread may need it. */ + if( typeflag != tf_directory ) std::remove( filename ); + if( !make_path( filename ) ) + { + if( format_file_error( rbuf, filename, intdir_msg, errno ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag ); + } + + switch( typeflag ) + { + case tf_regular: + case tf_hiperf: + outfd = open_outstream( filename, true, &rbuf ); + if( outfd < 0 ) + { + if( verbosity >= 0 && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, + typeflag ); + } + break; + case tf_link: + case tf_symlink: + { + const char * const linkname = extended.linkpath().c_str(); + const bool hard = typeflag == tf_link; + if( ( hard && link( linkname, filename ) != 0 ) || + ( !hard && symlink( linkname, filename ) != 0 ) ) + { + if( format_error( rbuf, errno, cantln_msg, hard ? "" : "sym", + linkname, filename ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); + } + } break; + case tf_directory: + { + struct stat st; + bool exists = ( stat( filename, &st ) == 0 ); + if( exists && !S_ISDIR( st.st_mode ) ) + { exists = false; std::remove( filename ); } + if( !exists && mkdir( filename, mode ) != 0 && errno != EEXIST ) + { + if( format_file_error( rbuf, filename, mkdir_msg, errno ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); + } + } break; + case tf_chardev: + case tf_blockdev: + { + const unsigned dev = + makedev( parse_octal( header + devmajor_o, devmajor_l ), + parse_octal( header + devminor_o, devminor_l ) ); + const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode; + if( mknod( filename, dmode, dev ) != 0 ) + { + if( format_file_error( rbuf, filename, mknod_msg, errno ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); + } + break; + } + case tf_fifo: + if( mkfifo( filename, mode ) != 0 ) + { + if( format_file_error( rbuf, filename, mkfifo_msg, errno ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); + } + break; + default: + if( format_error( rbuf, 0, uftype_msg, filename, typeflag ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 2 ); + return skip_member_lz( ar, courier, extended, member_id, worker_id, + typeflag ); + } + + const bool islink = ( typeflag == tf_link || typeflag == tf_symlink ); + errno = 0; + if( !islink && + ( !uid_gid_in_range( extended.get_uid(), extended.get_gid() ) || + chown( filename, extended.get_uid(), extended.get_gid() ) != 0 ) ) + { + if( outfd >= 0 ) mode &= ~( S_ISUID | S_ISGID | S_ISVTX ); + // chown will in many cases return with EPERM, which can be safely ignored. + if( errno != EPERM && errno != EINVAL ) + { + if( format_file_error( rbuf, filename, chown_msg, errno ) && + !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) ) + return Trival( other_msg, 0, 1 ); + set_error_status( 1 ); + } + } + + if( outfd >= 0 ) fchmod( outfd, mode ); // ignore errors + + if( data_may_follow( typeflag ) ) + { + const int bufsize = 32 * header_size; + uint8_t buf[bufsize]; + long long rest = extended.file_size(); + const int rem = rest % header_size; + const int padding = rem ? header_size - rem : 0; + while( rest > 0 ) + { + const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding; + const int ret = ar.read( buf, rsize ); + if( ret != 0 ) + { + if( outfd >= 0 ) + { + if( cl_opts.keep_damaged ) + { writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) ); + close( outfd ); } + else { close( outfd ); std::remove( filename ); } + } + return Trival( ar.e_msg(), ar.e_code(), ret ); + } + const int wsize = ( rest >= bufsize ) ? bufsize : rest; + if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize ) + { format_file_error( rbuf, filename, werr_msg, errno ); + return Trival( rbuf(), 0, 1 ); } + rest -= wsize; + } + } + if( outfd >= 0 && close( outfd ) != 0 ) + { format_file_error( rbuf, filename, eclosf_msg, errno ); + return Trival( rbuf(), 0, 1 ); } + if( !islink ) + { + struct utimbuf t; + t.actime = extended.atime().sec(); + t.modtime = extended.mtime().sec(); + utime( filename, &t ); // ignore errors + } + if( ar.at_member_end() && + !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) + return Trival( other_msg, 0, 1 ); + return Trival(); + } + + +struct Worker_arg + { + const Cl_options * cl_opts; + const Archive_descriptor * ad; + Packet_courier * courier; + Name_monitor * name_monitor; + std::vector< char > * name_pending; + int worker_id; + int num_workers; + }; + + +/* Read lzip members from archive, decode their tar members, and give the + packets produced to courier. +*/ +extern "C" void * dworker( void * arg ) + { + const Worker_arg & tmp = *(const Worker_arg *)arg; + const Cl_options & cl_opts = *tmp.cl_opts; + const Archive_descriptor & ad = *tmp.ad; + Packet_courier & courier = *tmp.courier; + Name_monitor & name_monitor = *tmp.name_monitor; + std::vector< char > & name_pending = *tmp.name_pending; + const int worker_id = tmp.worker_id; + const int num_workers = tmp.num_workers; + + bool master = false; + Resizable_buffer rbuf; + Archive_reader_i ar( ad ); // 1 of N parallel readers + if( !rbuf.size() || ar.fatal() ) + { if( courier.request_mastership( worker_id, worker_id ) ) + courier.collect_packet( worker_id, worker_id, mem_msg, Packet::error1 ); + goto done; } + + for( long i = worker_id; !master && i < ad.lzip_index.members(); i += num_workers ) + { + if( ad.lzip_index.dblock( i ).size() <= 0 ) // empty lzip member + { + if( courier.collect_packet( i, worker_id, "", Packet::member_done ) ) + continue; else break; + } + + long long data_end = ad.lzip_index.dblock( i ).end(); + Extended extended; // metadata from extended records + bool prev_extended = false; // prev header was extended + ar.set_member( i ); // prepare for new member + while( true ) // process one tar header per iteration + { + if( ar.data_pos() >= data_end ) // dblock.end or udata_size + { + if( ar.data_pos() == data_end && !prev_extended ) break; + // member end exceeded or ends in extended, process rest of file + if( !courier.request_mastership( i, worker_id ) ) goto done; + master = true; + if( data_end >= ad.lzip_index.udata_size() ) + { courier.collect_packet( i, worker_id, end_msg, Packet::error2 ); + goto done; } + data_end = ad.lzip_index.udata_size(); + if( ar.data_pos() == data_end && !prev_extended ) break; + } + Tar_header header; + const int ret = ar.read( header, header_size ); + if( ret != 0 ) + { if( courier.request_mastership( i, worker_id ) ) + courier.collect_packet( i, worker_id, ar.e_msg(), + ( ret == 1 ) ? Packet::error1 : Packet::error2, ar.e_code() ); + goto done; } + if( !verify_ustar_chksum( header ) ) // error or EOA + { + if( !courier.request_mastership( i, worker_id ) ) goto done; + if( block_is_zero( header, header_size ) ) // EOA + { + if( !prev_extended || cl_opts.permissive ) courier.report_eoa(); + else courier.collect_packet( i, worker_id, fv_msg1, Packet::error2 ); + goto done; + } + courier.collect_packet( i, worker_id, ( ar.data_pos() > header_size ) ? + bad_hdr_msg : posix_lz_msg, Packet::error2 ); + goto done; + } + + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_global ) + { + const char * msg = 0; int ret = 2; + Extended dummy; // global headers are parsed and ignored + if( prev_extended && !cl_opts.permissive ) msg = fv_msg2; + else if( ( ret = ar.parse_records( dummy, header, rbuf, gblrec_msg, + true ) ) != 0 ) msg = ar.e_msg(); + else + { + if( ar.data_pos() == data_end && // end of lzip member or EOF + !courier.collect_packet( i, worker_id, "", Packet::member_done ) ) + goto done; + continue; + } + if( courier.request_mastership( i, worker_id ) ) + courier.collect_packet( i, worker_id, msg, ( ret == 1 ) ? + Packet::error1 : Packet::error2 ); + goto done; + } + if( typeflag == tf_extended ) + { + const char * msg = 0; int ret = 2; + if( prev_extended && !cl_opts.permissive ) msg = fv_msg3; + else if( ( ret = ar.parse_records( extended, header, rbuf, extrec_msg, + cl_opts.permissive ) ) != 0 ) msg = ar.e_msg(); + else if( !extended.crc_present() && cl_opts.missing_crc ) + { msg = miscrc_msg; ret = 2; } + else { prev_extended = true; continue; } + if( courier.request_mastership( i, worker_id ) ) + courier.collect_packet( i, worker_id, msg, ( ret == 1 ) ? + Packet::error1 : Packet::error2 ); + goto done; + } + prev_extended = false; + + extended.fill_from_ustar( header ); // copy metadata from header + + /* Skip members with an empty name in the ustar header. If there is an + extended header in a previous lzip member, its worker will request + mastership. Else the ustar-only unnamed member will be ignored. */ + Trival trival; + if( check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) + trival = skip_member_lz( ar, courier, extended, i, worker_id, typeflag ); + else + { + std::string rpmsg; + if( print_removed_prefix( extended.removed_prefix, &rpmsg ) && + !courier.collect_packet( i, worker_id, rpmsg.c_str(), Packet::prefix ) ) + { trival = Trival( other_msg, 0, 1 ); goto fatal; } + if( cl_opts.program_mode == m_list ) + trival = list_member_lz( ar, courier, extended, header, rbuf, i, worker_id ); + else if( extended.path().empty() ) + trival = skip_member_lz( ar, courier, extended, i, worker_id, typeflag ); + else if( cl_opts.program_mode == m_diff ) + trival = compare_member_lz( cl_opts, ar, courier, extended, header, + rbuf, i, worker_id ); + else trival = extract_member_lz( cl_opts, ar, courier, extended, header, + rbuf, i, worker_id, name_monitor ); + } + if( trival.retval ) // fatal error +fatal: { if( courier.request_mastership( i, worker_id ) ) + courier.collect_packet( i, worker_id, trival.msg, + ( trival.retval == 1 ) ? Packet::error1 : Packet::error2, + trival.errcode ); + goto done; } + extended.reset(); + } + } +done: + courier.worker_finished(); + return 0; + } + + +/* Get from courier the processed and sorted packets, and print + the member lines on stdout or the diagnostics and errors on stderr. +*/ +void muxer( const char * const archive_namep, Packet_courier & courier ) + { + int retval = 0; + while( retval == 0 ) + { + const Packet * const opacket = courier.deliver_packet(); + if( !opacket ) break; // queue is empty. all workers exited + + switch( opacket->status ) + { + case Packet::error1: + case Packet::error2: + show_file_error( archive_namep, opacket->line.c_str(), opacket->errcode ); + retval = ( opacket->status == Packet::error1 ) ? 1 : 2; break; + case Packet::prefix: show_error( opacket->line.c_str() ); break; + case Packet::diag: std::fputs( opacket->line.c_str(), stderr ); break; + default: if( opacket->line.size() ) + { std::fputs( opacket->line.c_str(), stdout ); std::fflush( stdout ); } + } + delete opacket; + } + if( retval == 0 && !courier.eoa_found() ) // no worker found EOA blocks + { show_file_error( archive_namep, end_msg ); retval = 2; } + if( retval ) exit_fail_mt( retval ); + } + +} // end namespace + + +// init the courier, then start the workers and call the muxer. +int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, + std::vector< char > & name_pending ) + { + const int out_slots = 65536; // max small files (<=512B) in 64 MiB + const int num_workers = // limited to number of members + std::min( (long)cl_opts.num_workers, ad.lzip_index.members() ); + if( cl_opts.program_mode == m_extract ) get_umask(); // cache the umask + Name_monitor + name_monitor( ( cl_opts.program_mode == m_extract ) ? num_workers : 0 ); + + /* If an error happens after any threads have been started, exit must be + called before courier goes out of scope. */ + Packet_courier courier( num_workers, out_slots ); + + Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers]; + pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers]; + if( !worker_args || !worker_threads ) { show_error( mem_msg ); return 1; } + for( int i = 0; i < num_workers; ++i ) + { + worker_args[i].cl_opts = &cl_opts; + worker_args[i].ad = &ad; + worker_args[i].courier = &courier; + worker_args[i].name_monitor = &name_monitor; + worker_args[i].name_pending = &name_pending; + worker_args[i].worker_id = i; + worker_args[i].num_workers = num_workers; + const int errcode = + pthread_create( &worker_threads[i], 0, dworker, &worker_args[i] ); + if( errcode ) + { show_error( "Can't create worker threads", errcode ); exit_fail_mt(); } + } + + muxer( ad.namep, courier ); + + for( int i = num_workers - 1; i >= 0; --i ) + { + const int errcode = pthread_join( worker_threads[i], 0 ); + if( errcode ) + { show_error( "Can't join worker threads", errcode ); exit_fail_mt(); } + } + delete[] worker_threads; + delete[] worker_args; + + int retval = 0; + if( close( ad.infd ) != 0 ) + { show_file_error( ad.namep, eclosa_msg, errno ); retval = 1; } + + if( retval == 0 ) + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + if( nonempty_arg( cl_opts.parser, i ) && name_pending[i] ) + { show_file_error( cl_opts.parser.argument( i ).c_str(), nfound_msg ); + retval = 1; } + + if( cl_opts.debug_level & 1 ) + std::fprintf( stderr, + "muxer tried to consume from workers %8u times\n" + "muxer had to wait %8u times\n", + courier.ocheck_counter, + courier.owait_counter ); + + if( !courier.finished() ) internal_error( "courier not finished." ); + return final_exit_status( retval, cl_opts.program_mode != m_diff ); + } diff --git a/delete.cc b/delete.cc new file mode 100644 index 0000000..6fc58f5 --- /dev/null +++ b/delete.cc @@ -0,0 +1,190 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include // for lzlib.h +#include +#include + +#include "tarlz.h" +#include "arg_parser.h" +#include "lzip_index.h" +#include "archive_reader.h" + + +bool safe_seek( const int fd, const long long pos ) + { + if( lseek( fd, pos, SEEK_SET ) == pos ) return true; + show_error( seek_msg, errno ); return false; + } + + +int tail_copy( const Arg_parser & parser, const Archive_descriptor & ad, + std::vector< char > & name_pending, const long long istream_pos, + const int outfd, int retval ) + { + const long long rest = ad.lzip_index.file_size() - istream_pos; + if( istream_pos > 0 && rest > 0 && + ( !safe_seek( ad.infd, istream_pos ) || + !copy_file( ad.infd, outfd, rest ) ) ) + { show_file_error( ad.namep, "Error during tail copy." ); + return retval ? retval : 1; } + const long long ostream_pos = lseek( outfd, 0, SEEK_CUR ); + if( ostream_pos < 0 ) + { show_file_error( ad.namep, seek_msg, errno ); retval = 1; } + else if( ostream_pos > 0 && ostream_pos < ad.lzip_index.file_size() ) + { + int ret; + do ret = ftruncate( outfd, ostream_pos ); + while( ret != 0 && errno == EINTR ); + if( ret != 0 || lseek( outfd, 0, SEEK_END ) != ostream_pos ) + { + show_file_error( ad.namep, "Can't truncate archive", errno ); + if( retval < 1 ) retval = 1; + } + } + + if( ( close( outfd ) | close( ad.infd ) ) != 0 && retval == 0 ) + { show_file_error( ad.namep, eclosa_msg, errno ); retval = 1; } + + if( retval == 0 ) + for( int i = 0; i < parser.arguments(); ++i ) + if( nonempty_arg( parser, i ) && name_pending[i] ) + { show_file_error( parser.argument( i ).c_str(), nfound_msg ); + retval = 1; } + return retval; + } + + +/* Deleting from a corrupt archive must not worsen the corruption. Stop and + tail-copy as soon as corruption is found. +*/ +int delete_members( const Cl_options & cl_opts ) + { + if( cl_opts.num_files <= 0 ) + { if( verbosity >= 1 ) show_error( "Nothing to delete." ); return 0; } + if( cl_opts.archive_name.empty() ) + { show_error( "Deleting from stdin not implemented yet." ); return 1; } + const Archive_descriptor ad( cl_opts.archive_name ); + if( ad.infd < 0 ) return 1; + const int outfd = open_outstream( cl_opts.archive_name, false ); + if( outfd < 0 ) { close( ad.infd ); return 1; } + + // mark member names to be deleted + std::vector< char > name_pending( cl_opts.parser.arguments(), false ); + for( int i = 0; i < cl_opts.parser.arguments(); ++i ) + if( nonempty_arg( cl_opts.parser, i ) && + !Exclude::excluded( cl_opts.parser.argument( i ).c_str() ) ) + name_pending[i] = true; + + if( ad.indexed ) // archive is a compressed regular file + return delete_members_lz( cl_opts, ad, name_pending, outfd ); + if( !ad.seekable ) + { show_file_error( ad.namep, "Archive is not seekable." ); return 1; } + if( ad.lzip_index.file_size() < 3 * header_size ) + { show_file_error( ad.namep, has_lz_ext( ad.name ) ? posix_lz_msg : posix_msg ); + return 2; } + // archive is uncompressed seekable, unless compressed corrupt + + Archive_reader ar( ad ); // serial reader + Resizable_buffer rbuf; + long long istream_pos = 0; // source of next data move + long long member_begin = 0; // first pos of current tar member + Extended extended; // metadata from extended records + int retval = 0; + bool prev_extended = false; // prev header was extended + if( !rbuf.size() ) { show_error( mem_msg ); return 1; } + + while( true ) // process one tar header per iteration + { + if( !prev_extended && ( member_begin = lseek( ad.infd, 0, SEEK_CUR ) ) < 0 ) + { show_file_error( ad.namep, seek_msg, errno ); retval = 1; break; } + Tar_header header; + if( ( retval = ar.read( header, header_size ) ) != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); break; } + if( !verify_ustar_chksum( header ) ) // error or EOA + { + if( block_is_zero( header, header_size ) ) // EOA + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg1 ); retval = 2; } + break; + } + // posix format already verified by archive reader + show_file_error( ad.namep, bad_hdr_msg ); + retval = 2; break; + } + + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_global ) + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg2 ); retval = 2; break; } + Extended dummy; // global headers are parsed and ignored + retval = ar.parse_records( dummy, header, rbuf, gblrec_msg, true ); + if( retval ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); break; } + continue; + } + if( typeflag == tf_extended ) + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg3 ); retval = 2; break; } + if( ( retval = ar.parse_records( extended, header, rbuf, extrec_msg, + cl_opts.permissive ) ) != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); break; } + if( !extended.crc_present() && cl_opts.missing_crc ) + { show_file_error( ad.namep, miscrc_msg ); retval = 2; break; } + prev_extended = true; continue; + } + prev_extended = false; + + extended.fill_from_ustar( header ); // copy metadata from header + + if( ( retval = ar.skip_member( extended ) ) != 0 ) + { show_file_error( ad.namep, seek_msg, errno ); break; } + + // delete tar member + if( !check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) + { + print_removed_prefix( extended.removed_prefix ); + if( !show_member_name( extended, header, 1, rbuf ) ) + { retval = 1; break; } + const long long pos = lseek( ad.infd, 0, SEEK_CUR ); + if( pos <= 0 || pos <= member_begin || member_begin < istream_pos ) + { show_file_error( ad.namep, seek_msg, errno ); retval = 1; break; } + const long long size = member_begin - istream_pos; + if( size > 0 ) // move pending data each time a member is deleted + { + if( istream_pos == 0 ) + { if( !safe_seek( outfd, size ) ) { retval = 1; break; } } + else if( !safe_seek( ad.infd, istream_pos ) || + !copy_file( ad.infd, outfd, size ) || + !safe_seek( ad.infd, pos ) ) { retval = 1; break; } + } + istream_pos = pos; + } + extended.reset(); + } + + return tail_copy( cl_opts.parser, ad, name_pending, istream_pos, outfd, retval ); + } diff --git a/delete_lz.cc b/delete_lz.cc new file mode 100644 index 0000000..2e536e3 --- /dev/null +++ b/delete_lz.cc @@ -0,0 +1,139 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include // for lzlib.h +#include +#include + +#include "tarlz.h" +#include "arg_parser.h" +#include "lzip_index.h" +#include "archive_reader.h" + + +/* Deleting from a corrupt archive must not worsen the corruption. Stop and + tail-copy as soon as corruption is found. +*/ +int delete_members_lz( const Cl_options & cl_opts, + const Archive_descriptor & ad, + std::vector< char > & name_pending, + const int outfd ) + { + Archive_reader_i ar( ad ); // indexed reader + Resizable_buffer rbuf; + if( !rbuf.size() || ar.fatal() ) { show_error( mem_msg ); return 1; } + + long long istream_pos = 0; // source of next data move + int retval = 0, retval2 = 0; + for( long i = 0; i < ad.lzip_index.members(); ++i ) + { + if( ad.lzip_index.dblock( i ).size() == 0 ) continue; // empty lzip member + long long member_begin = 0; // first pos of current tar member + Extended extended; // metadata from extended records + bool prev_extended = false; // prev header was extended + ar.set_member( i ); // prepare for new member + while( true ) // process one tar header per iteration + { + if( ar.data_pos() >= ar.mdata_end() ) + { + if( ar.at_member_end() && !prev_extended ) break; + // member end exceeded or ends in extended + show_file_error( ad.namep, "Member misalignment found." ); + retval = 2; goto done; + } + if( !prev_extended ) member_begin = ar.data_pos(); + Tar_header header; + if( ( retval = ar.read( header, header_size ) ) != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); goto done; } + if( !verify_ustar_chksum( header ) ) // error or EOA + { + if( block_is_zero( header, header_size ) ) // EOA + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg1 ); retval = 2; } + goto done; + } + // indexed archive reader does not verify posix format + show_file_error( ad.namep, ( ar.data_pos() > header_size ) ? + bad_hdr_msg : posix_lz_msg ); + retval = 2; + goto done; + } + + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_global ) + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg2 ); retval = 2; goto done; } + Extended dummy; // global headers are parsed and ignored + retval = ar.parse_records( dummy, header, rbuf, gblrec_msg, true ); + if( retval ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); goto done; } + continue; + } + if( typeflag == tf_extended ) + { + if( prev_extended && !cl_opts.permissive ) + { show_file_error( ad.namep, fv_msg3 ); retval = 2; goto done; } + if( ( retval = ar.parse_records( extended, header, rbuf, extrec_msg, + cl_opts.permissive ) ) != 0 ) + { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); goto done; } + if( !extended.crc_present() && cl_opts.missing_crc ) + { show_file_error( ad.namep, miscrc_msg ); retval = 2; goto done; } + prev_extended = true; continue; + } + prev_extended = false; + + extended.fill_from_ustar( header ); // copy metadata from header + + if( ( retval = ar.skip_member( extended ) ) != 0 ) goto done; + + // delete tar member + if( !check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) ) + { + print_removed_prefix( extended.removed_prefix ); + // verify that members match + if( member_begin != ad.lzip_index.dblock( i ).pos() || !ar.at_member_end() ) + { show_file_error( extended.path().c_str(), + "Can't delete: not compressed individually." ); + retval2 = 2; extended.reset(); continue; } + if( !show_member_name( extended, header, 1, rbuf ) ) + { retval = 1; goto done; } + const long long size = ad.lzip_index.mblock( i ).pos() - istream_pos; + if( size > 0 ) // move pending data each time a member is deleted + { + if( istream_pos == 0 ) + { if( !safe_seek( outfd, size ) ) { retval = 1; goto done; } } + else if( !safe_seek( ad.infd, istream_pos ) || + !copy_file( ad.infd, outfd, size ) ) { retval = 1; goto done; } + } + istream_pos = ad.lzip_index.mblock( i ).end(); // member end + } + extended.reset(); + } + } +done: + if( retval < retval2 ) retval = retval2; + // tail copy keeps trailing data + return tail_copy( cl_opts.parser, ad, name_pending, istream_pos, outfd, retval ); + } diff --git a/doc/tarlz.1 b/doc/tarlz.1 new file mode 100644 index 0000000..d23b164 --- /dev/null +++ b/doc/tarlz.1 @@ -0,0 +1,177 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16. +.TH TARLZ "1" "September 2022" "tarlz 0.23" "User Commands" +.SH NAME +tarlz \- creates tar archives with multimember lzip compression +.SH SYNOPSIS +.B tarlz +\fI\,operation \/\fR[\fI\,options\/\fR] [\fI\,files\/\fR] +.SH DESCRIPTION +Tarlz is a massively parallel (multi\-threaded) combined implementation of +the tar archiver and the lzip compressor. Tarlz uses the compression library +lzlib. +.PP +Tarlz creates, lists, and extracts archives in a simplified and safer +variant of the POSIX pax format compressed in lzip format, keeping the +alignment between tar members and lzip members. The resulting multimember +tar.lz archive is fully backward compatible with standard tar tools like GNU +tar, which treat it like any other tar.lz archive. Tarlz can append files to +the end of such compressed archives. +.PP +Keeping the alignment between tar members and lzip members has two +advantages. It adds an indexed lzip layer on top of the tar archive, making +it possible to decode the archive safely in parallel. It also minimizes the +amount of data lost in case of corruption. +.PP +The tarlz file format is a safe POSIX\-style backup format. In case of +corruption, tarlz can extract all the undamaged members from the tar.lz +archive, skipping over the damaged members, just like the standard +(uncompressed) tar. Moreover, the option '\-\-keep\-damaged' can be used to +recover as much data as possible from each damaged member, and lziprecover +can be used to recover some of the damaged members. +.SS "Operations:" +.TP +\fB\-\-help\fR +display this help and exit +.TP +\fB\-V\fR, \fB\-\-version\fR +output version information and exit +.TP +\fB\-A\fR, \fB\-\-concatenate\fR +append archives to the end of an archive +.TP +\fB\-c\fR, \fB\-\-create\fR +create a new archive +.TP +\fB\-d\fR, \fB\-\-diff\fR +find differences between archive and file system +.TP +\fB\-\-delete\fR +delete files/directories from an archive +.TP +\fB\-r\fR, \fB\-\-append\fR +append files to the end of an archive +.TP +\fB\-t\fR, \fB\-\-list\fR +list the contents of an archive +.TP +\fB\-x\fR, \fB\-\-extract\fR +extract files/directories from an archive +.TP +\fB\-z\fR, \fB\-\-compress\fR +compress existing POSIX tar archives +.TP +\fB\-\-check\-lib\fR +check version of lzlib and exit +.SH OPTIONS +.TP +\fB\-B\fR, \fB\-\-data\-size=\fR +set target size of input data blocks [2x8=16 MiB] +.TP +\fB\-C\fR, \fB\-\-directory=\fR +change to directory +.TP +\fB\-f\fR, \fB\-\-file=\fR +use archive file +.TP +\fB\-h\fR, \fB\-\-dereference\fR +follow symlinks; archive the files they point to +.TP +\fB\-n\fR, \fB\-\-threads=\fR +set number of (de)compression threads [2] +.TP +\fB\-o\fR, \fB\-\-output=\fR +compress to +.TP +\fB\-p\fR, \fB\-\-preserve\-permissions\fR +don't subtract the umask on extraction +.TP +\fB\-q\fR, \fB\-\-quiet\fR +suppress all messages +.TP +\fB\-v\fR, \fB\-\-verbose\fR +verbosely list files processed +.TP +\fB\-0\fR .. \fB\-9\fR +set compression level [default 6] +.TP +\fB\-\-uncompressed\fR +don't compress the archive created +.TP +\fB\-\-asolid\fR +create solidly compressed appendable archive +.TP +\fB\-\-bsolid\fR +create per block compressed archive (default) +.TP +\fB\-\-dsolid\fR +create per directory compressed archive +.TP +\fB\-\-no\-solid\fR +create per file compressed archive +.TP +\fB\-\-solid\fR +create solidly compressed archive +.TP +\fB\-\-anonymous\fR +equivalent to '\-\-owner=root \fB\-\-group\fR=\fI\,root\/\fR' +.TP +\fB\-\-owner=\fR +use name/ID for files added to archive +.TP +\fB\-\-group=\fR +use name/ID for files added to archive +.TP +\fB\-\-exclude=\fR +exclude files matching a shell pattern +.TP +\fB\-\-ignore\-ids\fR +ignore differences in owner and group IDs +.TP +\fB\-\-ignore\-overflow\fR +ignore mtime overflow differences on 32\-bit +.TP +\fB\-\-keep\-damaged\fR +don't delete partially extracted files +.TP +\fB\-\-missing\-crc\fR +exit with error status if missing extended CRC +.TP +\fB\-\-mtime=\fR +use as mtime for files added to archive +.TP +\fB\-\-out\-slots=\fR +number of 1 MiB output packets buffered [64] +.TP +\fB\-\-warn\-newer\fR +warn if any file is newer than the archive +.PP +If no archive is specified, tarlz tries to read it from standard input or +write it to standard output. +.PP +Exit status: 0 for a normal exit, 1 for environmental problems +(file not found, files differ, invalid command line options, I/O errors, +etc), 2 to indicate a corrupt or invalid input file, 3 for an internal +consistency error (e.g., bug) which caused tarlz to panic. +.SH "REPORTING BUGS" +Report bugs to lzip\-bug@nongnu.org +.br +Tarlz home page: http://www.nongnu.org/lzip/tarlz.html +.SH COPYRIGHT +Copyright \(co 2022 Antonio Diaz Diaz. +Using lzlib 1.13 +License GPLv2+: GNU GPL version 2 or later +.br +This is free software: you are free to change and redistribute it. +There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +The full documentation for +.B tarlz +is maintained as a Texinfo manual. If the +.B info +and +.B tarlz +programs are properly installed at your site, the command +.IP +.B info tarlz +.PP +should give you access to the complete manual. diff --git a/doc/tarlz.info b/doc/tarlz.info new file mode 100644 index 0000000..d71c0a3 --- /dev/null +++ b/doc/tarlz.info @@ -0,0 +1,1272 @@ +This is tarlz.info, produced by makeinfo version 4.13+ from tarlz.texi. + +INFO-DIR-SECTION Archiving +START-INFO-DIR-ENTRY +* Tarlz: (tarlz). Archiver with multimember lzip compression +END-INFO-DIR-ENTRY + + +File: tarlz.info, Node: Top, Next: Introduction, Up: (dir) + +Tarlz Manual +************ + +This manual is for Tarlz (version 0.23, 23 September 2022). + +* Menu: + +* Introduction:: Purpose and features of tarlz +* Invoking tarlz:: Command line interface +* Portable character set:: POSIX portable filename character set +* File format:: Detailed format of the compressed archive +* Amendments to pax format:: The reasons for the differences with pax +* Program design:: Internal structure of tarlz +* Multi-threaded decoding:: Limitations of parallel tar decoding +* Minimum archive sizes:: Sizes required for full multi-threaded speed +* Examples:: A small tutorial with examples +* Problems:: Reporting bugs +* Concept index:: Index of concepts + + + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This manual is free documentation: you have unlimited permission to copy, +distribute, and modify it. + + +File: tarlz.info, Node: Introduction, Next: Invoking tarlz, Prev: Top, Up: Top + +1 Introduction +************** + +Tarlz is a massively parallel (multi-threaded) combined implementation of +the tar archiver and the lzip compressor. Tarlz uses the compression +library lzlib. + + Tarlz creates tar archives using a simplified and safer variant of the +POSIX pax format compressed in lzip format, keeping the alignment between +tar members and lzip members. The resulting multimember tar.lz archive is +fully backward compatible with standard tar tools like GNU tar, which treat +it like any other tar.lz archive. Tarlz can append files to the end of such +compressed archives. + + Keeping the alignment between tar members and lzip members has two +advantages. It adds an indexed lzip layer on top of the tar archive, making +it possible to decode the archive safely in parallel. It also minimizes the +amount of data lost in case of corruption. Compressing a tar archive with +plzip may even double the amount of files lost for each lzip member damaged +because it does not keep the members aligned. + + Tarlz can create tar archives with five levels of compression +granularity: per file (--no-solid), per block (--bsolid, default), per +directory (--dsolid), appendable solid (--asolid), and solid (--solid). It +can also create uncompressed tar archives. + +Of course, compressing each file (or each directory) individually can't +achieve a compression ratio as high as compressing solidly the whole tar +archive, but it has the following advantages: + + * The resulting multimember tar.lz archive can be decompressed in + parallel, multiplying the decompression speed. + + * New members can be appended to the archive (by removing the + end-of-archive member), and unwanted members can be deleted from the + archive. Just like an uncompressed tar archive. + + * It is a safe POSIX-style backup format. In case of corruption, tarlz + can extract all the undamaged members from the tar.lz archive, + skipping over the damaged members, just like the standard + (uncompressed) tar. Moreover, the option '--keep-damaged' can be used + to recover as much data as possible from each damaged member, and + lziprecover can be used to recover some of the damaged members. + + * A multimember tar.lz archive is usually smaller than the corresponding + solidly compressed tar.gz archive, except when individually + compressing files smaller than about 32 KiB. + + Tarlz protects the extended records with a Cyclic Redundancy Check (CRC) +in a way compatible with standard tar tools. *Note crc32::. + + Tarlz does not understand other tar formats like 'gnu', 'oldgnu', 'star' +or 'v7'. The command 'tarlz -tf archive.tar.lz > /dev/null' can be used to +verify that the format of the archive is compatible with tarlz. + + +File: tarlz.info, Node: Invoking tarlz, Next: Portable character set, Prev: Introduction, Up: Top + +2 Invoking tarlz +**************** + +The format for running tarlz is: + + tarlz OPERATION [OPTIONS] [FILES] + +All operations except '--concatenate' and '--compress' operate on whole +trees if any FILE is a directory. All operations except '--compress' +overwrite output files without warning. If no archive is specified, tarlz +tries to read it from standard input or write it to standard output. Tarlz +refuses to read archive data from a terminal or write archive data to a +terminal. Tarlz detects when the archive being created or enlarged is among +the files to be archived, appended, or concatenated, and skips it. + + Tarlz does not use absolute file names nor file names above the current +working directory (perhaps changed by option '-C'). On archive creation or +appending tarlz archives the files specified, but removes from member names +any leading and trailing slashes and any file name prefixes containing a +'..' component. On extraction, leading and trailing slashes are also +removed from member names, and archive members containing a '..' component +in the file name are skipped. Tarlz does not follow symbolic links during +extraction; not even symbolic links replacing intermediate directories. + + On extraction and listing, tarlz removes leading './' strings from +member names in the archive or given in the command line, so that +'tarlz -xf foo ./bar baz' extracts members 'bar' and './baz' from archive +'foo'. + + If several compression levels or '--*solid' options are given, the last +setting is used. For example '-9 --solid --uncompressed -1' is equivalent +to '-1 --solid'. + + tarlz supports the following operations: + +'--help' + Print an informative help message describing the options and exit. + +'-V' +'--version' + Print the version number of tarlz on the standard output and exit. + This version number should be included in all bug reports. + +'-A' +'--concatenate' + Append one or more archives to the end of an archive. If no archive is + specified with the option '-f', the input archives are concatenated to + standard output. All the archives involved must be regular (seekable) + files, and must be either all compressed or all uncompressed. + Compressed and uncompressed archives can't be mixed. Compressed + archives must be multimember lzip files with the two end-of-archive + blocks plus any zero padding contained in the last lzip member of each + archive. The intermediate end-of-archive blocks are removed as each + new archive is concatenated. If the archive is uncompressed, tarlz + parses tar headers until it finds the end-of-archive blocks. Exit with + status 0 without modifying the archive if no FILES have been specified. + + Concatenating archives containing files in common results in two or + more tar members with the same name in the resulting archive, which + may produce nondeterministic behavior during multi-threaded extraction. + *Note mt-extraction::. + +'-c' +'--create' + Create a new archive from FILES. + +'-d' +'--diff' + Compare and report differences between archive and file system. For + each tar member in the archive, verify that the corresponding file in + the file system exists and is of the same type (regular file, + directory, etc). Report on standard output the differences found in + type, mode (permissions), owner and group IDs, modification time, file + size, file contents (of regular files), target (of symlinks) and + device number (of block/character special files). + + As tarlz removes leading slashes from member names, the option '-C' may + be used in combination with '--diff' when absolute file names were used + on archive creation: 'tarlz -C / -d'. Alternatively, tarlz may be run + from the root directory to perform the comparison. + +'--delete' + Delete files and directories from an archive in place. It currently can + delete only from uncompressed archives and from archives with files + compressed individually ('--no-solid' archives). Note that files of + about '--data-size' or larger are compressed individually even if + '--bsolid' is used, and can therefore be deleted. Tarlz takes care to + not delete a tar member unless it is possible to do so. For example it + won't try to delete a tar member that is not compressed individually. + Even in the case of finding a corrupt member after having deleted some + member(s), tarlz stops and copies the rest of the file as soon as + corruption is found, leaving it just as corrupt as it was, but not + worse. + + To delete a directory without deleting the files under it, use + 'tarlz --delete -f foo --exclude='dir/*' dir'. Deleting in place may + be dangerous. A corrupt archive, a power cut, or an I/O error may cause + data loss. + +'-r' +'--append' + Append files to the end of an archive. The archive must be a regular + (seekable) file either compressed or uncompressed. Compressed members + can't be appended to an uncompressed archive, nor vice versa. If the + archive is compressed, it must be a multimember lzip file with the two + end-of-archive blocks plus any zero padding contained in the last lzip + member of the archive. It is possible to append files to an archive + with a different compression granularity. Appending works as follows; + first the end-of-archive blocks are removed, then the new members are + appended, and finally two new end-of-archive blocks are appended to + the archive. If the archive is uncompressed, tarlz parses and skips + tar headers until it finds the end-of-archive blocks. Exit with status + 0 without modifying the archive if no FILES have been specified. + + Appending files already present in the archive results in two or more + tar members with the same name, which may produce nondeterministic + behavior during multi-threaded extraction. *Note mt-extraction::. + +'-t' +'--list' + List the contents of an archive. If FILES are given, list only the + FILES given. + +'-x' +'--extract' + Extract files from an archive. If FILES are given, extract only the + FILES given. Else extract all the files in the archive. To extract a + directory without extracting the files under it, use + 'tarlz -xf foo --exclude='dir/*' dir'. Tarlz removes files and empty + directories unconditionally before extracting over them. Other than + that, it will not make any special effort to extract a file over an + incompatible type of file. For example, extracting a file over a + non-empty directory will usually fail. + +'-z' +'--compress' + Compress existing POSIX tar archives aligning the lzip members to the + tar members with choice of granularity (--bsolid by default, --dsolid + works like --asolid). The input archives are kept unchanged. Existing + compressed archives are not overwritten. A hyphen '-' used as the name + of an input archive reads from standard input and writes to standard + output (unless the option '--output' is used). Tarlz can be used as + compressor for GNU tar using a command like + 'tar -c -Hustar foo | tarlz -z -o foo.tar.lz'. Note that tarlz only + works reliably on archives without global headers, or with global + headers whose content can be ignored. + + The compression is reversible, including any garbage present after the + end-of-archive blocks. Tarlz stops parsing after the first + end-of-archive block is found, and then compresses the rest of the + archive. Unless solid compression is requested, the end-of-archive + blocks are compressed in a lzip member separated from the preceding + members and from any non-zero garbage following the end-of-archive + blocks. '--compress' implies plzip argument style, not tar style. Each + input archive is compressed to a file with the extension '.lz' added + unless the option '--output' is used. When '--output' is used, only + one input archive can be specified. '-f' can't be used with + '--compress'. + +'--check-lib' + Compare the version of lzlib used to compile tarlz with the version + actually being used at run time and exit. Report any differences + found. Exit with error status 1 if differences are found. A mismatch + may indicate that lzlib is not correctly installed or that a different + version of lzlib has been installed after compiling tarlz. Exit with + error status 2 if LZ_API_VERSION and LZ_version_string don't match. + 'tarlz -v --check-lib' shows the version of lzlib being used and the + value of LZ_API_VERSION (if defined). *Note Library version: + (lzlib)Library version. + + + tarlz supports the following options: *Note Argument syntax: +(arg_parser)Argument syntax. + +'-B BYTES' +'--data-size=BYTES' + Set target size of input data blocks for the option '--bsolid'. *Note + --bsolid::. Valid values range from 8 KiB to 1 GiB. Default value is + two times the dictionary size, except for option '-0' where it + defaults to 1 MiB. *Note Minimum archive sizes::. + +'-C DIR' +'--directory=DIR' + Change to directory DIR. When creating or appending, the position of + each '-C' option in the command line is significant; it will change the + current working directory for the following FILES until a new '-C' + option appears in the command line. When extracting or comparing, all + the '-C' options are executed in sequence before reading the archive. + Listing ignores any '-C' options specified. DIR is relative to the + then current working directory, perhaps changed by a previous '-C' + option. + + Note that a process can only have one current working directory (CWD). + Therefore multi-threading can't be used to create an archive if a '-C' + option appears after a relative file name in the command line. + +'-f ARCHIVE' +'--file=ARCHIVE' + Use archive file ARCHIVE. A hyphen '-' used as an ARCHIVE argument + reads from standard input or writes to standard output. + +'-h' +'--dereference' + Follow symbolic links during archive creation, appending or comparison. + Archive or compare the files they point to instead of the links + themselves. + +'-n N' +'--threads=N' + Set the number of (de)compression threads, overriding the system's + default. Valid values range from 0 to "as many as your system can + support". A value of 0 disables threads entirely. If this option is + not used, tarlz tries to detect the number of processors in the system + and use it as default value. 'tarlz --help' shows the system's default + value. See the note about multi-threaded archive creation in the + option '-C' above. + + Note that the number of usable threads is limited during compression to + ceil( uncompressed_size / data_size ) (*note Minimum archive sizes::), + and during decompression to the number of lzip members in the tar.lz + archive, which you can find by running 'lzip -lv archive.tar.lz'. + +'-o FILE' +'--output=FILE' + Write the compressed output to FILE. '-o -' writes the compressed + output to standard output. Currently '--output' only works with + '--compress'. + +'-p' +'--preserve-permissions' + On extraction, set file permissions as they appear in the archive. + This is the default behavior when tarlz is run by the superuser. The + default for other users is to subtract the umask of the user running + tarlz from the permissions specified in the archive. + +'-q' +'--quiet' + Quiet operation. Suppress all messages. + +'-v' +'--verbose' + Verbosely list files processed. Further -v's (up to 4) increase the + verbosity level. + +'-0 .. -9' + Set the compression level for '--create', '--append', and + '--compress'. The default compression level is '-6'. Like lzip, tarlz + also minimizes the dictionary size of the lzip members it creates, + reducing the amount of memory required for decompression. + + Level Dictionary size Match length limit + -0 64 KiB 16 bytes + -1 1 MiB 5 bytes + -2 1.5 MiB 6 bytes + -3 2 MiB 8 bytes + -4 3 MiB 12 bytes + -5 4 MiB 20 bytes + -6 8 MiB 36 bytes + -7 16 MiB 68 bytes + -8 24 MiB 132 bytes + -9 32 MiB 273 bytes + +'--uncompressed' + With '--create', don't compress the tar archive created. Create an + uncompressed tar archive instead. With '--append', don't compress the + new members appended to the tar archive. Compressed members can't be + appended to an uncompressed archive, nor vice versa. + +'--asolid' + When creating or appending to a compressed archive, use appendable + solid compression. All the files being added to the archive are + compressed into a single lzip member, but the end-of-archive blocks + are compressed into a separate lzip member. This creates a solidly + compressed appendable archive. Solid archives can't be created nor + decoded in parallel. + +'--bsolid' + When creating or appending to a compressed archive, use block + compression. Tar members are compressed together in a lzip member + until they approximate a target uncompressed size. The size can't be + exact because each solidly compressed data block must contain an + integer number of tar members. Block compression is the default + because it improves compression ratio for archives with many files + smaller than the block size. This option allows tarlz revert to + default behavior if, for example, it is invoked through an alias like + 'tar='tarlz --solid''. *Note --data-size::, to set the target block + size. + +'--dsolid' + When creating or appending to a compressed archive, compress each file + specified in the command line separately in its own lzip member, and + use solid compression for each directory specified in the command + line. The end-of-archive blocks are compressed into a separate lzip + member. This creates a compressed appendable archive with a separate + lzip member for each file or top-level directory specified. + +'--no-solid' + When creating or appending to a compressed archive, compress each file + separately in its own lzip member. The end-of-archive blocks are + compressed into a separate lzip member. This creates a compressed + appendable archive with a lzip member for each file. + +'--solid' + When creating or appending to a compressed archive, use solid + compression. The files being added to the archive, along with the + end-of-archive blocks, are compressed into a single lzip member. The + resulting archive is not appendable. No more files can be later + appended to the archive. Solid archives can't be created nor decoded + in parallel. + +'--anonymous' + Equivalent to '--owner=root --group=root'. + +'--owner=OWNER' + When creating or appending, use OWNER for files added to the archive. + If OWNER is not a valid user name, it is decoded as a decimal numeric + user ID. + +'--group=GROUP' + When creating or appending, use GROUP for files added to the archive. + If GROUP is not a valid group name, it is decoded as a decimal numeric + group ID. + +'--exclude=PATTERN' + Exclude files matching a shell pattern like '*.o'. A file is considered + to match if any component of the file name matches. For example, '*.o' + matches 'foo.o', 'foo.o/bar' and 'foo/bar.o'. If PATTERN contains a + '/', it matches a corresponding '/' in the file name. For example, + 'foo/*.o' matches 'foo/bar.o'. Multiple '--exclude' options can be + specified. + +'--ignore-ids' + Make '--diff' ignore differences in owner and group IDs. This option is + useful when comparing an '--anonymous' archive. + +'--ignore-overflow' + Make '--diff' ignore differences in mtime caused by overflow on 32-bit + systems with a 32-bit time_t. + +'--keep-damaged' + Don't delete partially extracted files. If a decompression error + happens while extracting a file, keep the partial data extracted. Use + this option to recover as much data as possible from each damaged + member. It is recommended to run tarlz in single-threaded mode + (--threads=0) when using this option. + +'--missing-crc' + Exit with error status 2 if the CRC of the extended records is + missing. When this option is used, tarlz detects any corruption in the + extended records (only limited by CRC collisions). But note that a + corrupt 'GNU.crc32' keyword, for example 'GNU.crc33', is reported as a + missing CRC instead of as a corrupt record. This misleading + 'Missing CRC' message is the consequence of a flaw in the POSIX pax + format; i.e., the lack of a mandatory check sequence of the extended + records. *Note crc32::. + +'--mtime=DATE' + When creating or appending, use DATE as the modification time for + files added to the archive instead of their actual modification times. + The value of DATE may be either '@' followed by the number of seconds + since (or before) the epoch, or a date in format + '[-]YYYY-MM-DD HH:MM:SS' or '[-]YYYY-MM-DDTHH:MM:SS', or the name of + an existing reference file starting with '.' or '/' whose modification + time is used. The time of day 'HH:MM:SS' in the date format is + optional and defaults to '00:00:00'. The epoch is + '1970-01-01 00:00:00 UTC'. Negative seconds or years define a + modification time before the epoch. + +'--out-slots=N' + Number of 1 MiB output packets buffered per worker thread during + multi-threaded creation or appending to compressed archives. + Increasing the number of packets may increase compression speed if the + files being archived are larger than 64 MiB compressed, but requires + more memory. Valid values range from 1 to 1024. The default value is + 64. + +'--warn-newer' + During archive creation, warn if any file being archived has a + modification time newer than the archive creation time. This option + may slow archive creation somewhat because it makes an extra call to + 'stat' after archiving each file, but it guarantees that file contents + were not modified during the creation of the archive. Note that the + file must be at least one second newer than the archive for it to be + detected as newer. + + + Exit status: 0 for a normal exit, 1 for environmental problems (file not +found, files differ, invalid command line options, I/O errors, etc), 2 to +indicate a corrupt or invalid input file, 3 for an internal consistency +error (e.g., bug) which caused tarlz to panic. + + +File: tarlz.info, Node: Portable character set, Next: File format, Prev: Invoking tarlz, Up: Top + +3 POSIX portable filename character set +*************************************** + +The set of characters from which portable file names are constructed. + + A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + a b c d e f g h i j k l m n o p q r s t u v w x y z + 0 1 2 3 4 5 6 7 8 9 . _ - + + The last three characters are the period, underscore, and hyphen-minus +characters, respectively. + + File names are identifiers. Therefore, archiving works better when file +names use only the portable character set without spaces added. + + +File: tarlz.info, Node: File format, Next: Amendments to pax format, Prev: Portable character set, Up: Top + +4 File format +************* + +In the diagram below, a box like this: + ++---+ +| | <-- the vertical bars might be missing ++---+ + + represents one byte; a box like this: + ++==============+ +| | ++==============+ + + represents a variable number of bytes or a fixed but large number of +bytes (for example 512). + + + A tar.lz file consists of a series of lzip members (compressed data +sets). The members simply appear one after another in the file, with no +additional information before, between, or after them. + + Each lzip member contains one or more tar members in a simplified POSIX +pax interchange format. The only pax typeflag value supported by tarlz (in +addition to the typeflag values defined by the ustar format) is 'x'. The +pax format is an extension on top of the ustar format that removes the size +limitations of the ustar format. + + Each tar member contains one file archived, and is represented by the +following sequence: + + * An optional extended header block followed by one or more blocks that + contain the extended header records as if they were the contents of a + file; i.e., the extended header records are included as the data for + this header block. This header block is of the form described in pax + header block, with a typeflag value of 'x'. + + * A header block in ustar format that describes the file. Any fields + defined in the preceding optional extended header records override the + associated fields in this header block for this file. + + * Zero or more blocks that contain the contents of the file. + + Each tar member must be contiguously stored in a lzip member for the +parallel decoding operations like '--list' to work. If any tar member is +split over two or more lzip members, the archive must be decoded +sequentially. *Note Multi-threaded decoding::. + + At the end of the archive file there are two 512-byte blocks filled with +binary zeros, interpreted as an end-of-archive indicator. These EOA blocks +are either compressed in a separate lzip member or compressed along with the +tar members contained in the last lzip member. For a compressed archive to +be recognized by tarlz as appendable, the last lzip member must contain +between 512 and 32256 zeros alone. + + The diagram below shows the correspondence between each tar member +(formed by one or two headers plus optional data) in the tar archive and +each lzip member in the resulting multimember tar.lz archive, when per file +compression is used: *Note File format: (lzip)File format. + +tar ++========+======+=================+===============+========+======+========+ +| header | data | extended header | extended data | header | data | EOA | ++========+======+=================+===============+========+======+========+ + +tar.lz ++===============+=================================================+========+ +| member | member | member | ++===============+=================================================+========+ + + +4.1 Pax header block +==================== + +The pax header block is identical to the ustar header block described below +except that the typeflag has the value 'x' (extended). The field 'size' is +the size of the extended header data in bytes. Most other fields in the pax +header block are zeroed on archive creation to prevent trouble if the +archive is read by an ustar tool, and are ignored by tarlz on archive +extraction. *Note flawed-compat::. + + The pax extended header data consists of one or more records, each of +them constructed as follows: +'"%d %s=%s\n", , , ' + + The fields and in the record must be limited to the +portable character set (*note Portable character set::). The field +contains the decimal length of the record in bytes, including the trailing +newline. The field is stored as-is, without conversion to UTF-8 nor +any other transformation. The fields are separated by the ASCII characters +space, equal-sign, and newline. + + These are the values currently supported by tarlz: + +'atime' + The signed decimal representation of the access time of the following + file in seconds since (or before) the epoch, obtained from the function + 'stat'. The atime record is created only for files with a modification + time outside of the ustar range. *Note ustar-mtime::. + +'gid' + The unsigned decimal representation of the group ID of the group that + owns the following file. The gid record is created only for files with + a group ID greater than 2_097_151 (octal 7777777). *Note + ustar-uid-gid::. + +'linkpath' + The file name of a link being created to another file, of any type, + previously archived. This record overrides the field 'linkname' in the + following ustar header block. The following ustar header block + determines the type of link created. If typeflag of the following + header block is 1, it will be a hard link. If typeflag is 2, it will + be a symbolic link and the linkpath value will be used as the contents + of the symbolic link. The linkpath record is created only for links + with a link name that does not fit in the space provided by the ustar + header. + +'mtime' + The signed decimal representation of the modification time of the + following file in seconds since (or before) the epoch, obtained from + the function 'stat'. This record overrides the field 'mtime' in the + following ustar header block. The mtime record is created only for + files with a modification time outside of the ustar range. *Note + ustar-mtime::. + +'path' + The file name of the following file. This record overrides the fields + 'name' and 'prefix' in the following ustar header block. The path + record is created for files with a name that does not fit in the space + provided by the ustar header, but is also created for files that + require any other extended record so that the fields 'name' and + 'prefix' in the following ustar header block can be zeroed. + +'size' + The size of the file in bytes, expressed as a decimal number using + digits from the ISO/IEC 646:1991 (ASCII) standard. This record + overrides the field 'size' in the following ustar header block. The + size record is created only for files with a size value greater than + 8_589_934_591 (octal 77777777777); that is, 8 GiB (2^33 bytes) or + larger. + +'uid' + The unsigned decimal representation of the user ID of the file owner + of the following file. The uid record is created only for files with a + user ID greater than 2_097_151 (octal 7777777). *Note ustar-uid-gid::. + +'GNU.crc32' + CRC32-C (Castagnoli) of the extended header data excluding the 8 bytes + representing the CRC itself. The is represented as 8 + hexadecimal digits in big endian order, '22 GNU.crc32=00000000\n'. The + keyword of the CRC record is protected by the CRC to guarante that + corruption is always detected when using '--missing-crc' (except in + case of CRC collision). A CRC was chosen because a checksum is too + weak for a potentially large list of variable sized records. A + checksum can't detect simple errors like the swapping of two bytes. + + + At verbosity level 1 or higher tarlz prints a diagnostic for each unknown +extended header keyword found in an archive, once per keyword. + + +4.2 Ustar header block +====================== + +The ustar header block has a length of 512 bytes and is structured as shown +in the following table. All lengths and offsets are in decimal. + +Field Name Offset Length (in bytes) +name 0 100 +mode 100 8 +uid 108 8 +gid 116 8 +size 124 12 +mtime 136 12 +chksum 148 8 +typeflag 156 1 +linkname 157 100 +magic 257 6 +version 263 2 +uname 265 32 +gname 297 32 +devmajor 329 8 +devminor 337 8 +prefix 345 155 + + All characters in the header block are coded using the ISO/IEC 646:1991 +(ASCII) standard, except in fields storing names for files, users, and +groups. For maximum portability between implementations, names should only +contain characters from the portable character set (*note Portable +character set::), but if an implementation supports the use of characters +outside of '/' and the portable character set in names for files, users, +and groups, tarlz will use the byte values in these names unmodified. + + The fields 'name', 'linkname', and 'prefix' are null-terminated +character strings except when all characters in the array contain non-null +characters including the last character. + + The fields 'name' and 'prefix' produce the file name. A new file name is +formed, if prefix is not an empty string (its first character is not null), +by concatenating prefix (up to the first null character), a slash +character, and name; otherwise, name is used alone. In either case, name is +terminated at the first null character. If prefix begins with a null +character, it is ignored. In this manner, file names of at most 256 +characters can be supported. If a file name does not fit in the space +provided, an extended record is used to store the file name. + + The field 'linkname' does not use the prefix to produce a file name. If +the link name does not fit in the 100 characters provided, an extended +record is used to store the link name. + + The field 'mode' provides 12 access permission bits. The following table +shows the symbolic name of each bit and its octal value: + +Bit Name Value Bit Name Value Bit Name Value +--------------------------------------------------- +S_ISUID 04000 S_ISGID 02000 S_ISVTX 01000 +S_IRUSR 00400 S_IWUSR 00200 S_IXUSR 00100 +S_IRGRP 00040 S_IWGRP 00020 S_IXGRP 00010 +S_IROTH 00004 S_IWOTH 00002 S_IXOTH 00001 + + The fields 'uid' and 'gid' are the user and group IDs of the owner and +group of the file, respectively. If the file uid or gid are greater than +2_097_151 (octal 7777777), an extended record is used to store the uid or +gid. + + The field 'size' contains the octal representation of the size of the +file in bytes. If the field 'typeflag' specifies a file of type '0' +(regular file) or '7' (high performance regular file), the number of logical +records following the header is (size / 512) rounded to the next integer. +For all other values of typeflag, tarlz either sets the size field to 0 or +ignores it, and does not store or expect any logical records following the +header. If the file size is larger than 8_589_934_591 bytes +(octal 77777777777), an extended record is used to store the file size. + + The field 'mtime' contains the octal representation of the modification +time of the file at the time it was archived, obtained from the function +'stat'. If the modification time is negative or larger than 8_589_934_591 +(octal 77777777777) seconds since the epoch, an extended record is used to +store the modification time. The ustar range of mtime goes from +'1970-01-01 00:00:00 UTC' to '2242-03-16 12:56:31 UTC'. + + The field 'chksum' contains the octal representation of the value of the +simple sum of all bytes in the header logical record. Each byte in the +header is treated as an unsigned value. When calculating the checksum, the +chksum field is treated as if it were all space characters. + + The field 'typeflag' contains a single character specifying the type of +file archived: + +''0'' + Regular file. + +''1'' + Hard link to another file, of any type, previously archived. Hard + links must not contain file data. + +''2'' + Symbolic link. + +''3', '4'' + Character special file and block special file respectively. In this + case the fields 'devmajor' and 'devminor' contain information defining + the device in unspecified format. + +''5'' + Directory. + +''6'' + FIFO special file. + +''7'' + Reserved to represent a file to which an implementation has associated + some high-performance attribute (contiguous file). Tarlz treats this + type of file as a regular file (type 0). + + + The field 'magic' contains the ASCII null-terminated string "ustar". The +field 'version' contains the characters "00" (0x30,0x30). The fields +'uname' and 'gname' are null-terminated character strings except when all +characters in the array contain non-null characters including the last +character. Each numeric field contains a leading space- or zero-filled, +optionally null-terminated octal number using digits from the ISO/IEC +646:1991 (ASCII) standard. Tarlz is able to decode numeric fields 1 byte +longer than standard ustar by not requiring a terminating null character. + + +File: tarlz.info, Node: Amendments to pax format, Next: Program design, Prev: File format, Up: Top + +5 The reasons for the differences with pax +****************************************** + +Tarlz creates safe archives that allow the reliable detection of invalid or +corrupt metadata during decoding even when the integrity checking of lzip +can't be used because the lzip members are only decompressed partially, as +it happens in parallel '--diff', '--list', and '--extract'. In order to +achieve this goal and avoid some other flaws in the pax format, tarlz makes +some changes to the variant of the pax format that it uses. This chapter +describes these changes and the concrete reasons to implement them. + + +5.1 Add a CRC of the extended records +===================================== + +The POSIX pax format has a serious flaw. The metadata stored in pax extended +records are not protected by any kind of check sequence. Corruption in a +long file name may cause the extraction of the file in the wrong place +without warning. Corruption in a large file size may cause the truncation of +the file or the appending of garbage to the file, both followed by a +spurious warning about a corrupt header far from the place of the undetected +corruption. + + Metadata like file name and file size must be always protected in an +archive format because of the adverse effects of undetected corruption in +them, potentially much worse that undetected corruption in the data. Even +more so in the case of pax because the amount of metadata it stores is +potentially large, making undetected corruption and archiver misbehavior +more probable. + + Headers and metadata must be protected separately from data because the +integrity checking of lzip may not be able to detect the corruption before +the metadata has been used, for example, to create a new file in the wrong +place. + + Because of the above, tarlz protects the extended records with a Cyclic +Redundancy Check (CRC) in a way compatible with standard tar tools. *Note +key_crc32::. + + +5.2 Remove flawed backward compatibility +======================================== + +In order to allow the extraction of pax archives by a tar utility conforming +to the POSIX-2:1993 standard, POSIX.1-2008 recommends selecting extended +header field values that allow such tar to create a regular file containing +the extended header records as data. This approach is broken because if the +extended header is needed because of a long file name, the fields 'name' +and 'prefix' will be unable to contain the full file name. (Some tar +implementations store the truncated name in the field 'name' alone, +truncating the name to only 100 bytes instead of 256). Therefore the files +corresponding to both the extended header and the overridden ustar header +will be extracted using truncated file names, perhaps overwriting existing +files or directories. It may be a security risk to extract a file with a +truncated file name. + + To avoid this problem, tarlz writes extended headers with all fields +zeroed except 'size' (which contains the size of the extended records), +'chksum', 'typeflag', 'magic', and 'version'. In particular, tarlz sets the +fields 'name' and 'prefix' to zero. This prevents old tar programs from +extracting the extended records as a file in the wrong place. Tarlz also +sets to zero those fields of the ustar header overridden by extended +records. Finally, tarlz skips members with zeroed 'name' and 'prefix' when +decoding, except when listing. This is needed to detect certain format +violations during parallel extraction. + + If an extended header is required for any reason (for example a file +size of 8 GiB or larger, or a link name longer than 100 bytes), tarlz also +moves the file name to the extended records to prevent an ustar tool from +trying to extract the file or link. This also makes easier during parallel +decoding the detection of a tar member split between two lzip members at +the boundary between the extended header and the ustar header. + + +5.3 As simple as possible (but not simpler) +=========================================== + +The tarlz format is mainly ustar. Extended pax headers are used only when +needed because the length of a file name or link name, or the size or other +attribute of a file exceed the limits of the ustar format. Adding 1 KiB of +extended header and records to each member just to save subsecond +timestamps seems wasteful for a backup format. Moreover, minimizing the +overhead may help recovering the archive with lziprecover in case of +corruption. + + Global pax headers are tolerated, but not supported; they are parsed and +ignored. Some operations may not behave as expected if the archive contains +global headers. + + +5.4 Improve reproducibility +=========================== + +Pax includes by default the process ID of the pax process in the ustar name +of the extended headers, making the archive not reproducible. Tarlz stores +the true name of the file just once, either in the ustar header or in the +extended records, making it easier to produce reproducible archives. + + Pax allows an extended record to have length x-1 or x if x is a power of +ten; '99<97_bytes>' or '100<97_bytes>'. Tarlz minimizes the length of the +record and always produces a length of x-1 in these cases. + + +5.5 No data in hard links +========================= + +Tarlz does not allow data in hard link members. The data (if any) must be in +the member determining the type of the file (which can't be a link). If all +the names of a file are stored as hard links, the type of the file is lost. +Not allowing data in hard links also prevents invalid actions like +extracting file data for a hard link to a symbolic link or to a directory. + + +5.6 Avoid misconversions to/from UTF-8 +====================================== + +There is no portable way to tell what charset a text string is coded into. +Therefore, tarlz stores all fields representing text strings unmodified, +without conversion to UTF-8 nor any other transformation. This prevents +accidental double UTF-8 conversions. If the need arises this behavior will +be adjusted with a command line option in the future. + + +File: tarlz.info, Node: Program design, Next: Multi-threaded decoding, Prev: Amendments to pax format, Up: Top + +6 Internal structure of tarlz +***************************** + +The parts of tarlz related to sequential processing of the archive are more +or less similar to any other tar and won't be described here. The +interesting parts described here are those related to Multi-threaded +processing. + + The structure of the part of tarlz performing Multi-threaded archive +creation is somewhat similar to that of plzip with the added complication +of the solidity levels. *Note Program design: (plzip)Program design. A +grouper thread and several worker threads are created, acting the main +thread as muxer (multiplexer) thread. A "packet courier" takes care of data +transfers among threads and limits the maximum number of data blocks +(packets) being processed simultaneously. + + The grouper traverses the directory tree, groups together the metadata of +the files to be archived in each lzip member, and distributes them to the +workers. The workers compress the metadata received from the grouper along +with the file data read from the file system. The muxer collects processed +packets from the workers, and writes them to the archive. + +.--------. +| data|---> to each worker below +| | .------------. +| file | ,-->| worker 0 |--, +| system | | `------------' | +| | .---------. | .------------. | .-------. .---------. +|metadata|--->| grouper |-+-->| worker 1 |--+-->| muxer |-->| archive | +`--------' `---------' | `------------' | `-------' `---------' + | ... | + | .------------. | + `-->| worker N-1 |--' + `------------' + + Decoding an archive is somewhat similar to how plzip decompresses a +regular file to standard output, with the differences that it is not the +data but only messages what is written to stdout/stderr, and that each +worker may access files in the file system either to read them (diff) or +write them (extract). As in plzip, each worker reads members directly from +the archive. + +.--------. +| file |<---> data to/from each worker below +| system | +`--------' .------------. + ,-->| worker 0 |--, + | `------------' | +.---------. | .------------. | .-------. .--------. +| archive |-+-->| worker 1 |--+-->| muxer |-->| stdout | +`---------' | `------------' | `-------' | stderr | + | ... | `--------' + | .------------. | + `-->| worker N-1 |--' + `------------' + + As misaligned tar.lz archives can't be decoded in parallel, and the +misalignment can't be detected until after decoding has started, a +"mastership request" mechanism has been designed that allows the decoding to +continue instead of signalling an error. + + During parallel decoding, if a worker finds a misalignment, it requests +mastership to decode the rest of the archive. When mastership is requested, +an error_member_id is set, and all subsequently received packets with +member_id > error_member_id are rejected. All workers requesting mastership +are blocked at the request_mastership call until mastership is granted. +Mastership is granted to the delivering worker when its queue is empty to +make sure that all preceding packets have been processed. When mastership is +granted, all packets are deleted and all subsequently received packets not +coming from the master are rejected. + + If a worker can't continue decoding for any cause (for example lack of +memory or finding a split tar member at the beginning of a lzip member), it +requests mastership to print an error and terminate the program. Only if +some other worker requests mastership in a previous lzip member can this +error be avoided. + + +File: tarlz.info, Node: Multi-threaded decoding, Next: Minimum archive sizes, Prev: Program design, Up: Top + +7 Limitations of parallel tar decoding +************************************** + +Safely decoding an arbitrary tar archive in parallel is only possible if one +decodes the headers sequentially first. For example, if a tar archive +containing another tar archive is decoded starting from some position other +than the beginning, there is no way to know if the first header found there +belongs to the outer tar archive or to the inner tar archive. Tar is a +format inherently serial; it was designed for tapes. + + The pax format is even more serial than the ustar format. Two headers +need to be decoded sequentially for each file. The extended header may even +need parsing to reveal something as basic as file size. If a thread decodes +the ustar header skipping the preceding extended header, it may extract a +file of incorrect size at the wrong place. Moreover, a pax archive with +global headers can't be decoded in parallel because each thread can't know +about the global headers decoded by other threads. + + In the case of compressed tar archives, the start of each compressed +block determines one point through which the tar archive can be decoded in +parallel. Therefore, in tar.lz archives the decoding operations can't be +parallelized if the tar members are not aligned with the lzip members. Tar +archives compressed with plzip can't be decoded in parallel because tar and +plzip do not have a way to align both sets of members. Certainly one can +decompress one such archive with a multi-threaded tool like plzip, but the +increase in speed is not as large as it could be because plzip must +serialize the decompressed data and pass them to tar, which decodes them +sequentially, one tar member at a time. + + On the other hand, if the tar.lz archive is created with a tool like +tarlz, which can guarantee the alignment between tar members and lzip +members because it controls both archiving and compression, then the lzip +format becomes an indexed layer on top of the tar archive which makes +possible decoding it safely in parallel. + + Tarlz is able to automatically decode aligned and unaligned multimember +tar.lz archives, keeping backwards compatibility. If tarlz finds a member +misalignment during multi-threaded decoding, it switches to single-threaded +mode and continues decoding the archive. + + If the files in the archive are large, multi-threaded '--list' on a +regular (seekable) tar.lz archive can be hundreds of times faster than +sequential '--list' because, in addition to using several processors, it +only needs to decompress part of each lzip member. See the following +example listing the Silesia corpus on a dual core machine: + + tarlz -9 --no-solid -cf silesia.tar.lz silesia + time lzip -cd silesia.tar.lz | tar -tf - (5.032s) + time plzip -cd silesia.tar.lz | tar -tf - (3.256s) + time tarlz -tf silesia.tar.lz (0.020s) + + On the other hand, multi-threaded '--list' won't detect corruption in +the tar member data because it only decodes the part of each lzip member +corresponding to the tar member header. This is another reason why the tar +headers must provide their own integrity checking. + + +7.1 Limitations of multi-threaded extraction +============================================ + +Multi-threaded extraction may produce different output than single-threaded +extraction in some cases: + + During multi-threaded extraction, several independent threads are +simultaneously reading the archive and creating files in the file system. +The archive is not read sequentially. As a consequence, any error or +weirdness in the archive (like a corrupt member or an end-of-archive block +in the middle of the archive) won't be usually detected until part of the +archive beyond that point has been processed. + + If the archive contains two or more tar members with the same name, +single-threaded extraction extracts the members in the order they appear in +the archive and leaves in the file system the last version of the file. But +multi-threaded extraction may extract the members in any order and leave in +the file system any version of the file nondeterministically. It is +unspecified which of the tar members is extracted. + + If the same file is extracted through several paths (different member +names resolve to the same file in the file system), the result is undefined. +(Probably the resulting file will be mangled). + + Extraction of a hard link may fail if it is extracted before the file it +links to. + + +File: tarlz.info, Node: Minimum archive sizes, Next: Examples, Prev: Multi-threaded decoding, Up: Top + +8 Minimum archive sizes required for multi-threaded block compression +********************************************************************* + +When creating or appending to a compressed archive using multi-threaded +block compression, tarlz puts tar members together in blocks and compresses +as many blocks simultaneously as worker threads are chosen, creating a +multimember compressed archive. + + For this to work as expected (and roughly multiply the compression speed +by the number of available processors), the uncompressed archive must be at +least as large as the number of worker threads times the block size (*note +--data-size::). Else some processors will not get any data to compress, and +compression will be proportionally slower. The maximum speed increase +achievable on a given archive is limited by the ratio +(uncompressed_size / data_size). For example, a tarball the size of gcc or +linux will scale up to 10 or 14 processors at level -9. + + The following table shows the minimum uncompressed archive size needed +for full use of N processors at a given compression level, using the default +data size for each level: + +Processors 2 4 8 16 64 256 +------------------------------------------------------------------ +Level +-0 2 MiB 4 MiB 8 MiB 16 MiB 64 MiB 256 MiB +-1 4 MiB 8 MiB 16 MiB 32 MiB 128 MiB 512 MiB +-2 6 MiB 12 MiB 24 MiB 48 MiB 192 MiB 768 MiB +-3 8 MiB 16 MiB 32 MiB 64 MiB 256 MiB 1 GiB +-4 12 MiB 24 MiB 48 MiB 96 MiB 384 MiB 1.5 GiB +-5 16 MiB 32 MiB 64 MiB 128 MiB 512 MiB 2 GiB +-6 32 MiB 64 MiB 128 MiB 256 MiB 1 GiB 4 GiB +-7 64 MiB 128 MiB 256 MiB 512 MiB 2 GiB 8 GiB +-8 96 MiB 192 MiB 384 MiB 768 MiB 3 GiB 12 GiB +-9 128 MiB 256 MiB 512 MiB 1 GiB 4 GiB 16 GiB + + +File: tarlz.info, Node: Examples, Next: Problems, Prev: Minimum archive sizes, Up: Top + +9 A small tutorial with examples +******************************** + +Example 1: Create a multimember compressed archive 'archive.tar.lz' +containing files 'a', 'b' and 'c'. + + tarlz -cf archive.tar.lz a b c + + +Example 2: Append files 'd' and 'e' to the multimember compressed archive +'archive.tar.lz'. + + tarlz -rf archive.tar.lz d e + + +Example 3: Create a solidly compressed appendable archive 'archive.tar.lz' +containing files 'a', 'b' and 'c'. Then append files 'd' and 'e' to the +archive. + + tarlz --asolid -cf archive.tar.lz a b c + tarlz --asolid -rf archive.tar.lz d e + + +Example 4: Create a compressed appendable archive containing directories +'dir1', 'dir2' and 'dir3' with a separate lzip member per directory. Then +append files 'a', 'b', 'c', 'd' and 'e' to the archive, all of them +contained in a single lzip member. The resulting archive 'archive.tar.lz' +contains 5 lzip members (including the end-of-archive member). + + tarlz --dsolid -cf archive.tar.lz dir1 dir2 dir3 + tarlz --asolid -rf archive.tar.lz a b c d e + + +Example 5: Create a solidly compressed archive 'archive.tar.lz' containing +files 'a', 'b' and 'c'. Note that no more files can be later appended to +the archive. + + tarlz --solid -cf archive.tar.lz a b c + + +Example 6: Extract all files from archive 'archive.tar.lz'. + + tarlz -xf archive.tar.lz + + +Example 7: Extract files 'a' and 'c', and the whole tree under directory +'dir1' from archive 'archive.tar.lz'. + + tarlz -xf archive.tar.lz a c dir1 + + +Example 8: Copy the contents of directory 'sourcedir' to the directory +'destdir'. + + tarlz -C sourcedir --uncompressed -cf - . | tarlz -C destdir -xf - + + +Example 9: Compress the existing POSIX archive 'archive.tar' and write the +output to 'archive.tar.lz'. Compress each member individually for maximum +availability. (If one member in the compressed archive gets damaged, the +other members can still be extracted). + + tarlz -z --no-solid archive.tar + + +Example 10: Compress the archive 'archive.tar' and write the output to +'foo.tar.lz'. + + tarlz -z -o foo.tar.lz archive.tar + + +Example 11: Concatenate and compress two archives 'archive1.tar' and +'archive2.tar', and write the output to 'foo.tar.lz'. + + tarlz -A archive1.tar archive2.tar | tarlz -z -o foo.tar.lz + + +File: tarlz.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top + +10 Reporting bugs +***************** + +There are probably bugs in tarlz. There are certainly errors and omissions +in this manual. If you report them, they will get fixed. If you don't, no +one will ever know about them and they will remain unfixed for all +eternity, if not longer. + + If you find a bug in tarlz, please send electronic mail to +. Include the version number, which you can find by +running 'tarlz --version' and 'tarlz -v --check-lib'. + + +File: tarlz.info, Node: Concept index, Prev: Problems, Up: Top + +Concept index +************* + +[index] +* Menu: + +* Amendments to pax format: Amendments to pax format. (line 6) +* bugs: Problems. (line 6) +* examples: Examples. (line 6) +* file format: File format. (line 6) +* getting help: Problems. (line 6) +* introduction: Introduction. (line 6) +* invoking: Invoking tarlz. (line 6) +* minimum archive sizes: Minimum archive sizes. (line 6) +* options: Invoking tarlz. (line 6) +* parallel tar decoding: Multi-threaded decoding. (line 6) +* portable character set: Portable character set. (line 6) +* program design: Program design. (line 6) +* usage: Invoking tarlz. (line 6) +* version: Invoking tarlz. (line 6) + + + +Tag Table: +Node: Top216 +Node: Introduction1210 +Node: Invoking tarlz4029 +Ref: --data-size12880 +Ref: --bsolid17192 +Node: Portable character set22788 +Node: File format23431 +Ref: key_crc3230188 +Ref: ustar-uid-gid33452 +Ref: ustar-mtime34254 +Node: Amendments to pax format36254 +Ref: crc3236963 +Ref: flawed-compat38274 +Node: Program design42364 +Node: Multi-threaded decoding46289 +Ref: mt-extraction49570 +Node: Minimum archive sizes50876 +Node: Examples53014 +Node: Problems55381 +Node: Concept index55936 + +End Tag Table + + +Local Variables: +coding: iso-8859-15 +End: diff --git a/doc/tarlz.texi b/doc/tarlz.texi new file mode 100644 index 0000000..5bdd2af --- /dev/null +++ b/doc/tarlz.texi @@ -0,0 +1,1338 @@ +\input texinfo @c -*-texinfo-*- +@c %**start of header +@setfilename tarlz.info +@documentencoding ISO-8859-15 +@settitle Tarlz Manual +@finalout +@c %**end of header + +@set UPDATED 23 September 2022 +@set VERSION 0.23 + +@dircategory Archiving +@direntry +* Tarlz: (tarlz). Archiver with multimember lzip compression +@end direntry + + +@ifnothtml +@titlepage +@title Tarlz +@subtitle Archiver with multimember lzip compression +@subtitle for Tarlz version @value{VERSION}, @value{UPDATED} +@author by Antonio Diaz Diaz + +@page +@vskip 0pt plus 1filll +@end titlepage + +@contents +@end ifnothtml + +@ifnottex +@node Top +@top + +This manual is for Tarlz (version @value{VERSION}, @value{UPDATED}). + +@menu +* Introduction:: Purpose and features of tarlz +* Invoking tarlz:: Command line interface +* Portable character set:: POSIX portable filename character set +* File format:: Detailed format of the compressed archive +* Amendments to pax format:: The reasons for the differences with pax +* Program design:: Internal structure of tarlz +* Multi-threaded decoding:: Limitations of parallel tar decoding +* Minimum archive sizes:: Sizes required for full multi-threaded speed +* Examples:: A small tutorial with examples +* Problems:: Reporting bugs +* Concept index:: Index of concepts +@end menu + +@sp 1 +Copyright @copyright{} 2013-2022 Antonio Diaz Diaz. + +This manual is free documentation: you have unlimited permission to copy, +distribute, and modify it. +@end ifnottex + + +@node Introduction +@chapter Introduction +@cindex introduction + +@uref{http://www.nongnu.org/lzip/tarlz.html,,Tarlz} is a massively parallel +(multi-threaded) combined implementation of the tar archiver and the +@uref{http://www.nongnu.org/lzip/lzip.html,,lzip} compressor. Tarlz uses the +compression library @uref{http://www.nongnu.org/lzip/lzlib.html,,lzlib}. + +Tarlz creates tar archives using a simplified and safer variant of the POSIX +pax format compressed in lzip format, keeping the alignment between tar +members and lzip members. The resulting multimember tar.lz archive is fully +backward compatible with standard tar tools like GNU tar, which treat it +like any other tar.lz archive. Tarlz can append files to the end of such +compressed archives. + +Keeping the alignment between tar members and lzip members has two +advantages. It adds an indexed lzip layer on top of the tar archive, making +it possible to decode the archive safely in parallel. It also minimizes the +amount of data lost in case of corruption. Compressing a tar archive with +plzip may even double the amount of files lost for each lzip member damaged +because it does not keep the members aligned. + +Tarlz can create tar archives with five levels of compression granularity: +per file (---no-solid), per block (---bsolid, default), per directory +(---dsolid), appendable solid (---asolid), and solid (---solid). It can also +create uncompressed tar archives. + +@noindent +Of course, compressing each file (or each directory) individually can't +achieve a compression ratio as high as compressing solidly the whole tar +archive, but it has the following advantages: + +@itemize @bullet +@item +The resulting multimember tar.lz archive can be decompressed in +parallel, multiplying the decompression speed. + +@item +New members can be appended to the archive (by removing the +end-of-archive member), and unwanted members can be deleted from the +archive. Just like an uncompressed tar archive. + +@item +It is a safe POSIX-style backup format. In case of corruption, tarlz +can extract all the undamaged members from the tar.lz archive, +skipping over the damaged members, just like the standard +(uncompressed) tar. Moreover, the option @samp{--keep-damaged} can be used +to recover as much data as possible from each damaged member, and +lziprecover can be used to recover some of the damaged members. + +@item +A multimember tar.lz archive is usually smaller than the corresponding +solidly compressed tar.gz archive, except when individually +compressing files smaller than about @w{32 KiB}. +@end itemize + +Tarlz protects the extended records with a Cyclic Redundancy Check (CRC) in +a way compatible with standard tar tools. @xref{crc32}. + +Tarlz does not understand other tar formats like @samp{gnu}, @samp{oldgnu}, +@samp{star} or @samp{v7}. The command +@w{@samp{tarlz -tf archive.tar.lz > /dev/null}} can be used to verify that +the format of the archive is compatible with tarlz. + + +@node Invoking tarlz +@chapter Invoking tarlz +@cindex invoking +@cindex options +@cindex usage +@cindex version + +The format for running tarlz is: + +@example +tarlz @var{operation} [@var{options}] [@var{files}] +@end example + +@noindent +All operations except @samp{--concatenate} and @samp{--compress} operate on +whole trees if any @var{file} is a directory. All operations except +@samp{--compress} overwrite output files without warning. If no archive is +specified, tarlz tries to read it from standard input or write it to +standard output. Tarlz refuses to read archive data from a terminal or write +archive data to a terminal. Tarlz detects when the archive being created or +enlarged is among the files to be archived, appended, or concatenated, and +skips it. + +Tarlz does not use absolute file names nor file names above the current +working directory (perhaps changed by option @samp{-C}). On archive creation +or appending tarlz archives the files specified, but removes from member +names any leading and trailing slashes and any file name prefixes containing +a @samp{..} component. On extraction, leading and trailing slashes are also +removed from member names, and archive members containing a @samp{..} +component in the file name are skipped. Tarlz does not follow symbolic links +during extraction; not even symbolic links replacing intermediate +directories. + +On extraction and listing, tarlz removes leading @samp{./} strings from +member names in the archive or given in the command line, so that +@w{@samp{tarlz -xf foo ./bar baz}} extracts members @samp{bar} and +@samp{./baz} from archive @samp{foo}. + +If several compression levels or @samp{--*solid} options are given, the last +setting is used. For example @w{@samp{-9 --solid --uncompressed -1}} is +equivalent to @w{@samp{-1 --solid}}. + +tarlz supports the following operations: + +@table @code +@item --help +Print an informative help message describing the options and exit. + +@item -V +@itemx --version +Print the version number of tarlz on the standard output and exit. +This version number should be included in all bug reports. + +@item -A +@itemx --concatenate +Append one or more archives to the end of an archive. If no archive is +specified with the option @samp{-f}, the input archives are concatenated to +standard output. All the archives involved must be regular (seekable) files, +and must be either all compressed or all uncompressed. Compressed and +uncompressed archives can't be mixed. Compressed archives must be +multimember lzip files with the two end-of-archive blocks plus any zero +padding contained in the last lzip member of each archive. The intermediate +end-of-archive blocks are removed as each new archive is concatenated. If +the archive is uncompressed, tarlz parses tar headers until it finds the +end-of-archive blocks. Exit with status 0 without modifying the archive if +no @var{files} have been specified. + +Concatenating archives containing files in common results in two or more tar +members with the same name in the resulting archive, which may produce +nondeterministic behavior during multi-threaded extraction. +@xref{mt-extraction}. + +@item -c +@itemx --create +Create a new archive from @var{files}. + +@item -d +@itemx --diff +Compare and report differences between archive and file system. For each tar +member in the archive, verify that the corresponding file in the file system +exists and is of the same type (regular file, directory, etc). Report on +standard output the differences found in type, mode (permissions), owner and +group IDs, modification time, file size, file contents (of regular files), +target (of symlinks) and device number (of block/character special files). + +As tarlz removes leading slashes from member names, the option @samp{-C} may +be used in combination with @samp{--diff} when absolute file names were used +on archive creation: @w{@samp{tarlz -C / -d}}. Alternatively, tarlz may be +run from the root directory to perform the comparison. + +@item --delete +Delete files and directories from an archive in place. It currently can +delete only from uncompressed archives and from archives with files +compressed individually (@samp{--no-solid} archives). Note that files of +about @samp{--data-size} or larger are compressed individually even if +@samp{--bsolid} is used, and can therefore be deleted. Tarlz takes care to +not delete a tar member unless it is possible to do so. For example it won't +try to delete a tar member that is not compressed individually. Even in the +case of finding a corrupt member after having deleted some member(s), tarlz +stops and copies the rest of the file as soon as corruption is found, +leaving it just as corrupt as it was, but not worse. + +To delete a directory without deleting the files under it, use +@w{@samp{tarlz --delete -f foo --exclude='dir/*' dir}}. Deleting in place +may be dangerous. A corrupt archive, a power cut, or an I/O error may cause +data loss. + +@item -r +@itemx --append +Append files to the end of an archive. The archive must be a regular +(seekable) file either compressed or uncompressed. Compressed members can't +be appended to an uncompressed archive, nor vice versa. If the archive is +compressed, it must be a multimember lzip file with the two end-of-archive +blocks plus any zero padding contained in the last lzip member of the +archive. It is possible to append files to an archive with a different +compression granularity. Appending works as follows; first the +end-of-archive blocks are removed, then the new members are appended, and +finally two new end-of-archive blocks are appended to the archive. If the +archive is uncompressed, tarlz parses and skips tar headers until it finds +the end-of-archive blocks. Exit with status 0 without modifying the archive +if no @var{files} have been specified. + +Appending files already present in the archive results in two or more tar +members with the same name, which may produce nondeterministic behavior +during multi-threaded extraction. @xref{mt-extraction}. + +@item -t +@itemx --list +List the contents of an archive. If @var{files} are given, list only the +@var{files} given. + +@item -x +@itemx --extract +Extract files from an archive. If @var{files} are given, extract only the +@var{files} given. Else extract all the files in the archive. To extract a +directory without extracting the files under it, use +@w{@samp{tarlz -xf foo --exclude='dir/*' dir}}. Tarlz removes files and +empty directories unconditionally before extracting over them. Other than +that, it will not make any special effort to extract a file over an +incompatible type of file. For example, extracting a file over a non-empty +directory will usually fail. + +@item -z +@itemx --compress +Compress existing POSIX tar archives aligning the lzip members to the tar +members with choice of granularity (---bsolid by default, ---dsolid works +like ---asolid). The input archives are kept unchanged. Existing compressed +archives are not overwritten. A hyphen @samp{-} used as the name of an input +archive reads from standard input and writes to standard output (unless the +option @samp{--output} is used). Tarlz can be used as compressor for GNU tar +using a command like @w{@samp{tar -c -Hustar foo | tarlz -z -o foo.tar.lz}}. +Note that tarlz only works reliably on archives without global headers, or +with global headers whose content can be ignored. + +The compression is reversible, including any garbage present after the +end-of-archive blocks. Tarlz stops parsing after the first end-of-archive +block is found, and then compresses the rest of the archive. Unless solid +compression is requested, the end-of-archive blocks are compressed in a lzip +member separated from the preceding members and from any non-zero garbage +following the end-of-archive blocks. @samp{--compress} implies plzip +argument style, not tar style. Each input archive is compressed to a file +with the extension @samp{.lz} added unless the option @samp{--output} is +used. When @samp{--output} is used, only one input archive can be specified. +@samp{-f} can't be used with @samp{--compress}. + +@item --check-lib +Compare the +@uref{http://www.nongnu.org/lzip/manual/lzlib_manual.html#Library-version,,version of lzlib} +used to compile tarlz with the version actually being used at run time and +exit. Report any differences found. Exit with error status 1 if differences +are found. A mismatch may indicate that lzlib is not correctly installed or +that a different version of lzlib has been installed after compiling tarlz. +Exit with error status 2 if LZ_API_VERSION and LZ_version_string don't +match. @w{@samp{tarlz -v --check-lib}} shows the version of lzlib being used +and the value of LZ_API_VERSION (if defined). +@ifnothtml +@xref{Library version,,,lzlib}. +@end ifnothtml + +@end table + +tarlz supports the following +@uref{http://www.nongnu.org/arg-parser/manual/arg_parser_manual.html#Argument-syntax,,options}: +@ifnothtml +@xref{Argument syntax,,,arg_parser}. +@end ifnothtml + +@table @code +@anchor{--data-size} +@item -B @var{bytes} +@itemx --data-size=@var{bytes} +Set target size of input data blocks for the option @samp{--bsolid}. +@xref{--bsolid}. Valid values range from @w{8 KiB} to @w{1 GiB}. Default +value is two times the dictionary size, except for option @samp{-0} where it +defaults to @w{1 MiB}. @xref{Minimum archive sizes}. + +@item -C @var{dir} +@itemx --directory=@var{dir} +Change to directory @var{dir}. When creating or appending, the position of +each @samp{-C} option in the command line is significant; it will change the +current working directory for the following @var{files} until a new +@samp{-C} option appears in the command line. When extracting or comparing, +all the @samp{-C} options are executed in sequence before reading the +archive. Listing ignores any @samp{-C} options specified. @var{dir} is +relative to the then current working directory, perhaps changed by a +previous @samp{-C} option. + +Note that a process can only have one current working directory (CWD). +Therefore multi-threading can't be used to create an archive if a @samp{-C} +option appears after a relative file name in the command line. + +@item -f @var{archive} +@itemx --file=@var{archive} +Use archive file @var{archive}. A hyphen @samp{-} used as an @var{archive} +argument reads from standard input or writes to standard output. + +@item -h +@itemx --dereference +Follow symbolic links during archive creation, appending or comparison. +Archive or compare the files they point to instead of the links themselves. + +@item -n @var{n} +@itemx --threads=@var{n} +Set the number of (de)compression threads, overriding the system's default. +Valid values range from 0 to "as many as your system can support". A value +of 0 disables threads entirely. If this option is not used, tarlz tries to +detect the number of processors in the system and use it as default value. +@w{@samp{tarlz --help}} shows the system's default value. See the note about +multi-threaded archive creation in the option @samp{-C} above. + +Note that the number of usable threads is limited during compression to +@w{ceil( uncompressed_size / data_size )} (@pxref{Minimum archive sizes}), +and during decompression to the number of lzip members in the tar.lz +archive, which you can find by running @w{@samp{lzip -lv archive.tar.lz}}. + +@item -o @var{file} +@itemx --output=@var{file} +Write the compressed output to @var{file}. @w{@samp{-o -}} writes the +compressed output to standard output. Currently @samp{--output} only works +with @samp{--compress}. + +@item -p +@itemx --preserve-permissions +On extraction, set file permissions as they appear in the archive. This is +the default behavior when tarlz is run by the superuser. The default for +other users is to subtract the umask of the user running tarlz from the +permissions specified in the archive. + +@item -q +@itemx --quiet +Quiet operation. Suppress all messages. + +@item -v +@itemx --verbose +Verbosely list files processed. Further -v's (up to 4) increase the +verbosity level. + +@item -0 .. -9 +Set the compression level for @samp{--create}, @samp{--append}, and +@samp{--compress}. The default compression level is @samp{-6}. Like lzip, +tarlz also minimizes the dictionary size of the lzip members it creates, +reducing the amount of memory required for decompression. + +@multitable {Level} {Dictionary size} {Match length limit} +@item Level @tab Dictionary size @tab Match length limit +@item -0 @tab 64 KiB @tab 16 bytes +@item -1 @tab 1 MiB @tab 5 bytes +@item -2 @tab 1.5 MiB @tab 6 bytes +@item -3 @tab 2 MiB @tab 8 bytes +@item -4 @tab 3 MiB @tab 12 bytes +@item -5 @tab 4 MiB @tab 20 bytes +@item -6 @tab 8 MiB @tab 36 bytes +@item -7 @tab 16 MiB @tab 68 bytes +@item -8 @tab 24 MiB @tab 132 bytes +@item -9 @tab 32 MiB @tab 273 bytes +@end multitable + +@item --uncompressed +With @samp{--create}, don't compress the tar archive created. Create an +uncompressed tar archive instead. With @samp{--append}, don't compress the +new members appended to the tar archive. Compressed members can't be +appended to an uncompressed archive, nor vice versa. + +@item --asolid +When creating or appending to a compressed archive, use appendable solid +compression. All the files being added to the archive are compressed into a +single lzip member, but the end-of-archive blocks are compressed into a +separate lzip member. This creates a solidly compressed appendable archive. +Solid archives can't be created nor decoded in parallel. + +@anchor{--bsolid} +@item --bsolid +When creating or appending to a compressed archive, use block compression. +Tar members are compressed together in a lzip member until they approximate +a target uncompressed size. The size can't be exact because each solidly +compressed data block must contain an integer number of tar members. Block +compression is the default because it improves compression ratio for +archives with many files smaller than the block size. This option allows +tarlz revert to default behavior if, for example, it is invoked through an +alias like @w{@samp{tar='tarlz --solid'}}. @xref{--data-size}, to set the +target block size. + +@item --dsolid +When creating or appending to a compressed archive, compress each file +specified in the command line separately in its own lzip member, and use +solid compression for each directory specified in the command line. The +end-of-archive blocks are compressed into a separate lzip member. This +creates a compressed appendable archive with a separate lzip member for each +file or top-level directory specified. + +@item --no-solid +When creating or appending to a compressed archive, compress each file +separately in its own lzip member. The end-of-archive blocks are compressed +into a separate lzip member. This creates a compressed appendable archive +with a lzip member for each file. + +@item --solid +When creating or appending to a compressed archive, use solid compression. +The files being added to the archive, along with the end-of-archive blocks, +are compressed into a single lzip member. The resulting archive is not +appendable. No more files can be later appended to the archive. Solid +archives can't be created nor decoded in parallel. + +@item --anonymous +Equivalent to @w{@samp{--owner=root --group=root}}. + +@item --owner=@var{owner} +When creating or appending, use @var{owner} for files added to the archive. +If @var{owner} is not a valid user name, it is decoded as a decimal numeric +user ID. + +@item --group=@var{group} +When creating or appending, use @var{group} for files added to the archive. +If @var{group} is not a valid group name, it is decoded as a decimal numeric +group ID. + +@item --exclude=@var{pattern} +Exclude files matching a shell pattern like @samp{*.o}. A file is considered +to match if any component of the file name matches. For example, @samp{*.o} +matches @samp{foo.o}, @samp{foo.o/bar} and @samp{foo/bar.o}. If +@var{pattern} contains a @samp{/}, it matches a corresponding @samp{/} in +the file name. For example, @samp{foo/*.o} matches @samp{foo/bar.o}. +Multiple @samp{--exclude} options can be specified. + +@item --ignore-ids +Make @samp{--diff} ignore differences in owner and group IDs. This option is +useful when comparing an @samp{--anonymous} archive. + +@item --ignore-overflow +Make @samp{--diff} ignore differences in mtime caused by overflow on 32-bit +systems with a 32-bit time_t. + +@item --keep-damaged +Don't delete partially extracted files. If a decompression error happens +while extracting a file, keep the partial data extracted. Use this option to +recover as much data as possible from each damaged member. It is recommended +to run tarlz in single-threaded mode (---threads=0) when using this option. + +@item --missing-crc +Exit with error status 2 if the CRC of the extended records is missing. When +this option is used, tarlz detects any corruption in the extended records +(only limited by CRC collisions). But note that a corrupt @samp{GNU.crc32} +keyword, for example @samp{GNU.crc33}, is reported as a missing CRC instead +of as a corrupt record. This misleading @w{@samp{Missing CRC}} message is +the consequence of a flaw in the POSIX pax format; i.e., the lack of a +mandatory check sequence of the extended records. @xref{crc32}. + +@item --mtime=@var{date} +When creating or appending, use @var{date} as the modification time for +files added to the archive instead of their actual modification times. The +value of @var{date} may be either @samp{@@} followed by the number of +seconds since (or before) the epoch, or a date in format +@w{@samp{[-]YYYY-MM-DD HH:MM:SS}} or @samp{[-]YYYY-MM-DDTHH:MM:SS}, or the +name of an existing reference file starting with @samp{.} or @samp{/} whose +modification time is used. The time of day @samp{HH:MM:SS} in the date +format is optional and defaults to @samp{00:00:00}. The epoch is +@w{@samp{1970-01-01 00:00:00 UTC}}. Negative seconds or years define a +modification time before the epoch. + +@item --out-slots=@var{n} +Number of @w{1 MiB} output packets buffered per worker thread during +multi-threaded creation or appending to compressed archives. Increasing the +number of packets may increase compression speed if the files being archived +are larger than @w{64 MiB} compressed, but requires more memory. Valid +values range from 1 to 1024. The default value is 64. + +@item --warn-newer +During archive creation, warn if any file being archived has a modification +time newer than the archive creation time. This option may slow archive +creation somewhat because it makes an extra call to @samp{stat} after +archiving each file, but it guarantees that file contents were not modified +during the creation of the archive. Note that the file must be at least one +second newer than the archive for it to be detected as newer. + +@ignore +@item --permissive +Allow some violations of the archive format, like consecutive extended +headers preceding a ustar header, or several records with the same +keyword appearing in the same block of extended records. +@end ignore + +@end table + +Exit status: 0 for a normal exit, 1 for environmental problems +(file not found, files differ, invalid command line options, I/O errors, +etc), 2 to indicate a corrupt or invalid input file, 3 for an internal +consistency error (e.g., bug) which caused tarlz to panic. + + +@node Portable character set +@chapter POSIX portable filename character set +@cindex portable character set + +The set of characters from which portable file names are constructed. + +@example +A B C D E F G H I J K L M N O P Q R S T U V W X Y Z +a b c d e f g h i j k l m n o p q r s t u v w x y z +0 1 2 3 4 5 6 7 8 9 . _ - +@end example + +The last three characters are the period, underscore, and hyphen-minus +characters, respectively. + +File names are identifiers. Therefore, archiving works better when file +names use only the portable character set without spaces added. + + +@node File format +@chapter File format +@cindex file format + +In the diagram below, a box like this: + +@verbatim ++---+ +| | <-- the vertical bars might be missing ++---+ +@end verbatim + +represents one byte; a box like this: + +@verbatim ++==============+ +| | ++==============+ +@end verbatim + +represents a variable number of bytes or a fixed but large number of +bytes (for example 512). + +@sp 1 +A tar.lz file consists of a series of lzip members (compressed data sets). +The members simply appear one after another in the file, with no additional +information before, between, or after them. + +Each lzip member contains one or more tar members in a simplified POSIX pax +interchange format. The only pax typeflag value supported by tarlz (in +addition to the typeflag values defined by the ustar format) is @samp{x}. +The pax format is an extension on top of the ustar format that removes the +size limitations of the ustar format. + +Each tar member contains one file archived, and is represented by the +following sequence: + +@itemize @bullet +@item +An optional extended header block followed by one or more blocks that +contain the extended header records as if they were the contents of a file; +i.e., the extended header records are included as the data for this header +block. This header block is of the form described in pax header block, with +a typeflag value of @samp{x}. + +@item +A header block in ustar format that describes the file. Any fields defined +in the preceding optional extended header records override the associated +fields in this header block for this file. + +@item +Zero or more blocks that contain the contents of the file. +@end itemize + +Each tar member must be contiguously stored in a lzip member for the +parallel decoding operations like @samp{--list} to work. If any tar member +is split over two or more lzip members, the archive must be decoded +sequentially. @xref{Multi-threaded decoding}. + +At the end of the archive file there are two 512-byte blocks filled with +binary zeros, interpreted as an end-of-archive indicator. These EOA blocks +are either compressed in a separate lzip member or compressed along with the +tar members contained in the last lzip member. For a compressed archive to +be recognized by tarlz as appendable, the last lzip member must contain +between 512 and 32256 zeros alone. + +The diagram below shows the correspondence between each tar member (formed +by one or two headers plus optional data) in the tar archive and each +@uref{http://www.nongnu.org/lzip/manual/lzip_manual.html#File-format,,lzip member} +in the resulting multimember tar.lz archive, when per file compression is +used: +@ifnothtml +@xref{File format,,,lzip}. +@end ifnothtml + +@verbatim +tar ++========+======+=================+===============+========+======+========+ +| header | data | extended header | extended data | header | data | EOA | ++========+======+=================+===============+========+======+========+ + +tar.lz ++===============+=================================================+========+ +| member | member | member | ++===============+=================================================+========+ +@end verbatim + +@ignore +When @samp{--permissive} is used, the following violations of the +archive format are allowed:@* +If several extended headers precede an ustar header, only the last +extended header takes effect. The other extended headers are ignored. +Similarly, if several records with the same keyword appear in the same +block of extended records, only the last record for the repeated keyword +takes effect. The other records for the repeated keyword are ignored.@* +A global header inserted between an extended header and an ustar header.@* +An extended header just before the end-of-archive blocks. +@end ignore + +@sp 1 +@section Pax header block + +The pax header block is identical to the ustar header block described below +except that the typeflag has the value @samp{x} (extended). The field +@samp{size} is the size of the extended header data in bytes. Most other +fields in the pax header block are zeroed on archive creation to prevent +trouble if the archive is read by an ustar tool, and are ignored by tarlz on +archive extraction. @xref{flawed-compat}. + +The pax extended header data consists of one or more records, each of +them constructed as follows:@* +@w{@samp{"%d %s=%s\n", , , }} + +The fields and in the record must be limited to the +portable character set (@pxref{Portable character set}). The field +contains the decimal length of the record in bytes, including the trailing +newline. The field is stored as-is, without conversion to UTF-8 nor +any other transformation. The fields are separated by the ASCII characters +space, equal-sign, and newline. + +These are the values currently supported by tarlz: + +@table @code +@item atime +The signed decimal representation of the access time of the following file +in seconds since (or before) the epoch, obtained from the function +@samp{stat}. The atime record is created only for files with a modification +time outside of the ustar range. @xref{ustar-mtime}. + +@item gid +The unsigned decimal representation of the group ID of the group that owns +the following file. The gid record is created only for files with a group ID +greater than 2_097_151 (octal 7777777). @xref{ustar-uid-gid}. + +@item linkpath +The file name of a link being created to another file, of any type, +previously archived. This record overrides the field @samp{linkname} in the +following ustar header block. The following ustar header block determines +the type of link created. If typeflag of the following header block is 1, it +will be a hard link. If typeflag is 2, it will be a symbolic link and the +linkpath value will be used as the contents of the symbolic link. The +linkpath record is created only for links with a link name that does not fit +in the space provided by the ustar header. + +@item mtime +The signed decimal representation of the modification time of the following +file in seconds since (or before) the epoch, obtained from the function +@samp{stat}. This record overrides the field @samp{mtime} in the following +ustar header block. The mtime record is created only for files with a +modification time outside of the ustar range. @xref{ustar-mtime}. + +@item path +The file name of the following file. This record overrides the fields +@samp{name} and @samp{prefix} in the following ustar header block. The path +record is created for files with a name that does not fit in the space +provided by the ustar header, but is also created for files that require any +other extended record so that the fields @samp{name} and @samp{prefix} in +the following ustar header block can be zeroed. + +@item size +The size of the file in bytes, expressed as a decimal number using digits +from the ISO/IEC 646:1991 (ASCII) standard. This record overrides the field +@samp{size} in the following ustar header block. The size record is created +only for files with a size value greater than 8_589_934_591 +@w{(octal 77777777777)}; that is, @w{8 GiB} (2^33 bytes) or larger. + +@item uid +The unsigned decimal representation of the user ID of the file owner of the +following file. The uid record is created only for files with a user ID +greater than 2_097_151 (octal 7777777). @xref{ustar-uid-gid}. + +@anchor{key_crc32} +@item GNU.crc32 +CRC32-C (Castagnoli) of the extended header data excluding the 8 bytes +representing the CRC itself. The is represented as 8 +hexadecimal digits in big endian order, +@w{@samp{22 GNU.crc32=00000000\n}}. The keyword of the CRC record is +protected by the CRC to guarante that corruption is always detected when +using @samp{--missing-crc} (except in case of CRC collision). A CRC was +chosen because a checksum is too weak for a potentially large list of +variable sized records. A checksum can't detect simple errors like the +swapping of two bytes. + +@end table + +At verbosity level 1 or higher tarlz prints a diagnostic for each unknown +extended header keyword found in an archive, once per keyword. + +@sp 1 +@section Ustar header block + +The ustar header block has a length of 512 bytes and is structured as +shown in the following table. All lengths and offsets are in decimal. + +@multitable {Field Name} {Offset} {Length (in bytes)} +@item Field Name @tab Offset @tab Length (in bytes) +@item name @tab 0 @tab 100 +@item mode @tab 100 @tab 8 +@item uid @tab 108 @tab 8 +@item gid @tab 116 @tab 8 +@item size @tab 124 @tab 12 +@item mtime @tab 136 @tab 12 +@item chksum @tab 148 @tab 8 +@item typeflag @tab 156 @tab 1 +@item linkname @tab 157 @tab 100 +@item magic @tab 257 @tab 6 +@item version @tab 263 @tab 2 +@item uname @tab 265 @tab 32 +@item gname @tab 297 @tab 32 +@item devmajor @tab 329 @tab 8 +@item devminor @tab 337 @tab 8 +@item prefix @tab 345 @tab 155 +@end multitable + +All characters in the header block are coded using the ISO/IEC 646:1991 +(ASCII) standard, except in fields storing names for files, users, and +groups. For maximum portability between implementations, names should only +contain characters from the portable character set (@pxref{Portable +character set}), but if an implementation supports the use of characters +outside of @samp{/} and the portable character set in names for files, +users, and groups, tarlz will use the byte values in these names unmodified. + +The fields @samp{name}, @samp{linkname}, and @samp{prefix} are +null-terminated character strings except when all characters in the array +contain non-null characters including the last character. + +The fields @samp{name} and @samp{prefix} produce the file name. A new file +name is formed, if prefix is not an empty string (its first character is not +null), by concatenating prefix (up to the first null character), a slash +character, and name; otherwise, name is used alone. In either case, name is +terminated at the first null character. If prefix begins with a null +character, it is ignored. In this manner, file names of at most 256 +characters can be supported. If a file name does not fit in the space +provided, an extended record is used to store the file name. + +The field @samp{linkname} does not use the prefix to produce a file name. If +the link name does not fit in the 100 characters provided, an extended +record is used to store the link name. + +The field @samp{mode} provides 12 access permission bits. The following +table shows the symbolic name of each bit and its octal value: + +@multitable {Bit Name} {Value} {Bit Name} {Value} {Bit Name} {Value} +@headitem Bit Name @tab Value @tab Bit Name @tab Value @tab Bit Name @tab Value +@item S_ISUID @tab 04000 @tab S_ISGID @tab 02000 @tab S_ISVTX @tab 01000 +@item S_IRUSR @tab 00400 @tab S_IWUSR @tab 00200 @tab S_IXUSR @tab 00100 +@item S_IRGRP @tab 00040 @tab S_IWGRP @tab 00020 @tab S_IXGRP @tab 00010 +@item S_IROTH @tab 00004 @tab S_IWOTH @tab 00002 @tab S_IXOTH @tab 00001 +@end multitable + +@anchor{ustar-uid-gid} +The fields @samp{uid} and @samp{gid} are the user and group IDs of the owner +and group of the file, respectively. If the file uid or gid are greater than +2_097_151 (octal 7777777), an extended record is used to store the uid or gid. + +The field @samp{size} contains the octal representation of the size of the +file in bytes. If the field @samp{typeflag} specifies a file of type '0' +(regular file) or '7' (high performance regular file), the number of logical +records following the header is @w{(size / 512)} rounded to the next +integer. For all other values of typeflag, tarlz either sets the size field +to 0 or ignores it, and does not store or expect any logical records +following the header. If the file size is larger than 8_589_934_591 bytes +@w{(octal 77777777777)}, an extended record is used to store the file size. + +@anchor{ustar-mtime} +The field @samp{mtime} contains the octal representation of the modification +time of the file at the time it was archived, obtained from the function +@samp{stat}. If the modification time is negative or larger than +8_589_934_591 @w{(octal 77777777777)} seconds since the epoch, an extended +record is used to store the modification time. The ustar range of mtime goes +from @w{@samp{1970-01-01 00:00:00 UTC}} to @w{@samp{2242-03-16 12:56:31 UTC}}. + +The field @samp{chksum} contains the octal representation of the value of +the simple sum of all bytes in the header logical record. Each byte in the +header is treated as an unsigned value. When calculating the checksum, the +chksum field is treated as if it were all space characters. + +The field @samp{typeflag} contains a single character specifying the type of +file archived: + +@table @code +@item '0' +Regular file. + +@item '1' +Hard link to another file, of any type, previously archived. Hard links must +not contain file data. + +@item '2' +Symbolic link. + +@item '3', '4' +Character special file and block special file respectively. In this case the +fields @samp{devmajor} and @samp{devminor} contain information defining the +device in unspecified format. + +@item '5' +Directory. + +@item '6' +FIFO special file. + +@item '7' +Reserved to represent a file to which an implementation has associated some +high-performance attribute (contiguous file). Tarlz treats this type of file +as a regular file (type 0). + +@end table + +The field @samp{magic} contains the ASCII null-terminated string "ustar". +The field @samp{version} contains the characters "00" (0x30,0x30). The +fields @samp{uname} and @samp{gname} are null-terminated character strings +except when all characters in the array contain non-null characters +including the last character. Each numeric field contains a leading space- +or zero-filled, optionally null-terminated octal number using digits from +the ISO/IEC 646:1991 (ASCII) standard. Tarlz is able to decode numeric +fields 1 byte longer than standard ustar by not requiring a terminating null +character. + + +@node Amendments to pax format +@chapter The reasons for the differences with pax +@cindex Amendments to pax format + +Tarlz creates safe archives that allow the reliable detection of invalid or +corrupt metadata during decoding even when the integrity checking of lzip +can't be used because the lzip members are only decompressed partially, as +it happens in parallel @samp{--diff}, @samp{--list}, and @samp{--extract}. +In order to achieve this goal and avoid some other flaws in the pax format, +tarlz makes some changes to the variant of the pax format that it uses. This +chapter describes these changes and the concrete reasons to implement them. + +@sp 1 +@anchor{crc32} +@section Add a CRC of the extended records + +The POSIX pax format has a serious flaw. The metadata stored in pax extended +records are not protected by any kind of check sequence. Corruption in a +long file name may cause the extraction of the file in the wrong place +without warning. Corruption in a large file size may cause the truncation of +the file or the appending of garbage to the file, both followed by a +spurious warning about a corrupt header far from the place of the undetected +corruption. + +Metadata like file name and file size must be always protected in an archive +format because of the adverse effects of undetected corruption in them, +potentially much worse that undetected corruption in the data. Even more so +in the case of pax because the amount of metadata it stores is potentially +large, making undetected corruption and archiver misbehavior more probable. + +Headers and metadata must be protected separately from data because the +integrity checking of lzip may not be able to detect the corruption before +the metadata has been used, for example, to create a new file in the wrong +place. + +Because of the above, tarlz protects the extended records with a Cyclic +Redundancy Check (CRC) in a way compatible with standard tar tools. +@xref{key_crc32}. + +@sp 1 +@anchor{flawed-compat} +@section Remove flawed backward compatibility + +In order to allow the extraction of pax archives by a tar utility conforming +to the POSIX-2:1993 standard, POSIX.1-2008 recommends selecting extended +header field values that allow such tar to create a regular file containing +the extended header records as data. This approach is broken because if the +extended header is needed because of a long file name, the fields +@samp{name} and @samp{prefix} will be unable to contain the full file name. +(Some tar implementations store the truncated name in the field @samp{name} +alone, truncating the name to only 100 bytes instead of 256). Therefore the +files corresponding to both the extended header and the overridden ustar +header will be extracted using truncated file names, perhaps overwriting +existing files or directories. It may be a security risk to extract a file +with a truncated file name. + +To avoid this problem, tarlz writes extended headers with all fields zeroed +except @samp{size} (which contains the size of the extended records), +@samp{chksum}, @samp{typeflag}, @samp{magic}, and @samp{version}. In +particular, tarlz sets the fields @samp{name} and @samp{prefix} to zero. +This prevents old tar programs from extracting the extended records as a +file in the wrong place. Tarlz also sets to zero those fields of the ustar +header overridden by extended records. Finally, tarlz skips members with +zeroed @samp{name} and @samp{prefix} when decoding, except when listing. +This is needed to detect certain format violations during parallel +extraction. + +If an extended header is required for any reason (for example a file size of +@w{8 GiB} or larger, or a link name longer than 100 bytes), tarlz also moves +the file name to the extended records to prevent an ustar tool from trying +to extract the file or link. This also makes easier during parallel decoding +the detection of a tar member split between two lzip members at the boundary +between the extended header and the ustar header. + +@sp 1 +@section As simple as possible (but not simpler) + +The tarlz format is mainly ustar. Extended pax headers are used only when +needed because the length of a file name or link name, or the size or other +attribute of a file exceed the limits of the ustar format. Adding @w{1 KiB} +of extended header and records to each member just to save subsecond +timestamps seems wasteful for a backup format. Moreover, minimizing the +overhead may help recovering the archive with lziprecover in case of +corruption. + +Global pax headers are tolerated, but not supported; they are parsed and +ignored. Some operations may not behave as expected if the archive contains +global headers. + +@sp 1 +@section Improve reproducibility + +Pax includes by default the process ID of the pax process in the ustar name +of the extended headers, making the archive not reproducible. Tarlz stores +the true name of the file just once, either in the ustar header or in the +extended records, making it easier to produce reproducible archives. + +Pax allows an extended record to have length x-1 or x if x is a power of +ten; @samp{99<97_bytes>} or @samp{100<97_bytes>}. Tarlz minimizes the length +of the record and always produces a length of x-1 in these cases. + +@sp 1 +@section No data in hard links + +Tarlz does not allow data in hard link members. The data (if any) must be in +the member determining the type of the file (which can't be a link). If all +the names of a file are stored as hard links, the type of the file is lost. +Not allowing data in hard links also prevents invalid actions like +extracting file data for a hard link to a symbolic link or to a directory. + +@sp 1 +@section Avoid misconversions to/from UTF-8 + +There is no portable way to tell what charset a text string is coded into. +Therefore, tarlz stores all fields representing text strings unmodified, +without conversion to UTF-8 nor any other transformation. This prevents +accidental double UTF-8 conversions. If the need arises this behavior will +be adjusted with a command line option in the future. + + +@node Program design +@chapter Internal structure of tarlz +@cindex program design + +The parts of tarlz related to sequential processing of the archive are more +or less similar to any other tar and won't be described here. The interesting +parts described here are those related to Multi-threaded processing. + +The structure of the part of tarlz performing Multi-threaded archive +creation is somewhat similar to that of +@uref{http://www.nongnu.org/lzip/plzip.html#Program-design,,plzip} with the +added complication of the solidity levels. +@ifnothtml +@xref{Program design,,,plzip}. +@end ifnothtml +A grouper thread and several worker threads are created, acting the main +thread as muxer (multiplexer) thread. A "packet courier" takes care of data +transfers among threads and limits the maximum number of data blocks +(packets) being processed simultaneously. + +The grouper traverses the directory tree, groups together the metadata of +the files to be archived in each lzip member, and distributes them to the +workers. The workers compress the metadata received from the grouper along +with the file data read from the file system. The muxer collects processed +packets from the workers, and writes them to the archive. + +@verbatim +.--------. +| data|---> to each worker below +| | .------------. +| file | ,-->| worker 0 |--, +| system | | `------------' | +| | .---------. | .------------. | .-------. .---------. +|metadata|--->| grouper |-+-->| worker 1 |--+-->| muxer |-->| archive | +`--------' `---------' | `------------' | `-------' `---------' + | ... | + | .------------. | + `-->| worker N-1 |--' + `------------' +@end verbatim + +Decoding an archive is somewhat similar to how plzip decompresses a regular +file to standard output, with the differences that it is not the data but +only messages what is written to stdout/stderr, and that each worker may +access files in the file system either to read them (diff) or write them +(extract). As in plzip, each worker reads members directly from the archive. + +@verbatim +.--------. +| file |<---> data to/from each worker below +| system | +`--------' .------------. + ,-->| worker 0 |--, + | `------------' | +.---------. | .------------. | .-------. .--------. +| archive |-+-->| worker 1 |--+-->| muxer |-->| stdout | +`---------' | `------------' | `-------' | stderr | + | ... | `--------' + | .------------. | + `-->| worker N-1 |--' + `------------' +@end verbatim + +As misaligned tar.lz archives can't be decoded in parallel, and the +misalignment can't be detected until after decoding has started, a +"mastership request" mechanism has been designed that allows the decoding to +continue instead of signalling an error. + +During parallel decoding, if a worker finds a misalignment, it requests +mastership to decode the rest of the archive. When mastership is requested, +an error_member_id is set, and all subsequently received packets with +member_id > error_member_id are rejected. All workers requesting mastership +are blocked at the request_mastership call until mastership is granted. +Mastership is granted to the delivering worker when its queue is empty to +make sure that all preceding packets have been processed. When mastership is +granted, all packets are deleted and all subsequently received packets not +coming from the master are rejected. + +If a worker can't continue decoding for any cause (for example lack of +memory or finding a split tar member at the beginning of a lzip member), it +requests mastership to print an error and terminate the program. Only if +some other worker requests mastership in a previous lzip member can this +error be avoided. + + +@node Multi-threaded decoding +@chapter Limitations of parallel tar decoding +@cindex parallel tar decoding + +Safely decoding an arbitrary tar archive in parallel is only possible if one +decodes the headers sequentially first. For example, if a tar archive +containing another tar archive is decoded starting from some position other +than the beginning, there is no way to know if the first header found there +belongs to the outer tar archive or to the inner tar archive. Tar is a +format inherently serial; it was designed for tapes. + +The pax format is even more serial than the ustar format. Two headers need +to be decoded sequentially for each file. The extended header may even need +parsing to reveal something as basic as file size. If a thread decodes the +ustar header skipping the preceding extended header, it may extract a file +of incorrect size at the wrong place. Moreover, a pax archive with global +headers can't be decoded in parallel because each thread can't know about +the global headers decoded by other threads. + +In the case of compressed tar archives, the start of each compressed block +determines one point through which the tar archive can be decoded in +parallel. Therefore, in tar.lz archives the decoding operations can't be +parallelized if the tar members are not aligned with the lzip members. Tar +archives compressed with plzip can't be decoded in parallel because tar and +plzip do not have a way to align both sets of members. Certainly one can +decompress one such archive with a multi-threaded tool like plzip, but the +increase in speed is not as large as it could be because plzip must +serialize the decompressed data and pass them to tar, which decodes them +sequentially, one tar member at a time. + +On the other hand, if the tar.lz archive is created with a tool like tarlz, +which can guarantee the alignment between tar members and lzip members +because it controls both archiving and compression, then the lzip format +becomes an indexed layer on top of the tar archive which makes possible +decoding it safely in parallel. + +Tarlz is able to automatically decode aligned and unaligned multimember +tar.lz archives, keeping backwards compatibility. If tarlz finds a member +misalignment during multi-threaded decoding, it switches to single-threaded +mode and continues decoding the archive. + +If the files in the archive are large, multi-threaded @samp{--list} on a +regular (seekable) tar.lz archive can be hundreds of times faster than +sequential @samp{--list} because, in addition to using several processors, +it only needs to decompress part of each lzip member. See the following +example listing the Silesia corpus on a dual core machine: + +@example +tarlz -9 --no-solid -cf silesia.tar.lz silesia +time lzip -cd silesia.tar.lz | tar -tf - (5.032s) +time plzip -cd silesia.tar.lz | tar -tf - (3.256s) +time tarlz -tf silesia.tar.lz (0.020s) +@end example + +On the other hand, multi-threaded @samp{--list} won't detect corruption in +the tar member data because it only decodes the part of each lzip member +corresponding to the tar member header. This is another reason why the tar +headers must provide their own integrity checking. + +@sp 1 +@anchor{mt-extraction} +@section Limitations of multi-threaded extraction + +Multi-threaded extraction may produce different output than single-threaded +extraction in some cases: + +During multi-threaded extraction, several independent threads are +simultaneously reading the archive and creating files in the file system. +The archive is not read sequentially. As a consequence, any error or +weirdness in the archive (like a corrupt member or an end-of-archive block +in the middle of the archive) won't be usually detected until part of the +archive beyond that point has been processed. + +If the archive contains two or more tar members with the same name, +single-threaded extraction extracts the members in the order they appear in +the archive and leaves in the file system the last version of the file. But +multi-threaded extraction may extract the members in any order and leave in +the file system any version of the file nondeterministically. It is +unspecified which of the tar members is extracted. + +If the same file is extracted through several paths (different member names +resolve to the same file in the file system), the result is undefined. +(Probably the resulting file will be mangled). + +Extraction of a hard link may fail if it is extracted before the file it +links to. + + +@node Minimum archive sizes +@chapter Minimum archive sizes required for multi-threaded block compression +@cindex minimum archive sizes + +When creating or appending to a compressed archive using multi-threaded +block compression, tarlz puts tar members together in blocks and compresses +as many blocks simultaneously as worker threads are chosen, creating a +multimember compressed archive. + +For this to work as expected (and roughly multiply the compression speed by +the number of available processors), the uncompressed archive must be at +least as large as the number of worker threads times the block size +(@pxref{--data-size}). Else some processors will not get any data to +compress, and compression will be proportionally slower. The maximum speed +increase achievable on a given archive is limited by the ratio +@w{(uncompressed_size / data_size)}. For example, a tarball the size of gcc +or linux will scale up to 10 or 14 processors at level -9. + +The following table shows the minimum uncompressed archive size needed for +full use of N processors at a given compression level, using the default +data size for each level: + +@multitable {Processors} {512 MiB} {512 MiB} {512 MiB} {512 MiB} {512 MiB} {512 MiB} +@headitem Processors @tab 2 @tab 4 @tab 8 @tab 16 @tab 64 @tab 256 +@item Level +@item -0 @tab 2 MiB @tab 4 MiB @tab 8 MiB @tab 16 MiB @tab 64 MiB @tab 256 MiB +@item -1 @tab 4 MiB @tab 8 MiB @tab 16 MiB @tab 32 MiB @tab 128 MiB @tab 512 MiB +@item -2 @tab 6 MiB @tab 12 MiB @tab 24 MiB @tab 48 MiB @tab 192 MiB @tab 768 MiB +@item -3 @tab 8 MiB @tab 16 MiB @tab 32 MiB @tab 64 MiB @tab 256 MiB @tab 1 GiB +@item -4 @tab 12 MiB @tab 24 MiB @tab 48 MiB @tab 96 MiB @tab 384 MiB @tab 1.5 GiB +@item -5 @tab 16 MiB @tab 32 MiB @tab 64 MiB @tab 128 MiB @tab 512 MiB @tab 2 GiB +@item -6 @tab 32 MiB @tab 64 MiB @tab 128 MiB @tab 256 MiB @tab 1 GiB @tab 4 GiB +@item -7 @tab 64 MiB @tab 128 MiB @tab 256 MiB @tab 512 MiB @tab 2 GiB @tab 8 GiB +@item -8 @tab 96 MiB @tab 192 MiB @tab 384 MiB @tab 768 MiB @tab 3 GiB @tab 12 GiB +@item -9 @tab 128 MiB @tab 256 MiB @tab 512 MiB @tab 1 GiB @tab 4 GiB @tab 16 GiB +@end multitable + + +@node Examples +@chapter A small tutorial with examples +@cindex examples + +@noindent +Example 1: Create a multimember compressed archive @samp{archive.tar.lz} +containing files @samp{a}, @samp{b} and @samp{c}. + +@example +tarlz -cf archive.tar.lz a b c +@end example + +@sp 1 +@noindent +Example 2: Append files @samp{d} and @samp{e} to the multimember compressed +archive @samp{archive.tar.lz}. + +@example +tarlz -rf archive.tar.lz d e +@end example + +@sp 1 +@noindent +Example 3: Create a solidly compressed appendable archive +@samp{archive.tar.lz} containing files @samp{a}, @samp{b} and @samp{c}. +Then append files @samp{d} and @samp{e} to the archive. + +@example +tarlz --asolid -cf archive.tar.lz a b c +tarlz --asolid -rf archive.tar.lz d e +@end example + +@sp 1 +@noindent +Example 4: Create a compressed appendable archive containing directories +@samp{dir1}, @samp{dir2} and @samp{dir3} with a separate lzip member per +directory. Then append files @samp{a}, @samp{b}, @samp{c}, @samp{d} and +@samp{e} to the archive, all of them contained in a single lzip member. +The resulting archive @samp{archive.tar.lz} contains 5 lzip members +(including the end-of-archive member). + +@example +tarlz --dsolid -cf archive.tar.lz dir1 dir2 dir3 +tarlz --asolid -rf archive.tar.lz a b c d e +@end example + +@sp 1 +@noindent +Example 5: Create a solidly compressed archive @samp{archive.tar.lz} +containing files @samp{a}, @samp{b} and @samp{c}. Note that no more +files can be later appended to the archive. + +@example +tarlz --solid -cf archive.tar.lz a b c +@end example + +@sp 1 +@noindent +Example 6: Extract all files from archive @samp{archive.tar.lz}. + +@example +tarlz -xf archive.tar.lz +@end example + +@sp 1 +@noindent +Example 7: Extract files @samp{a} and @samp{c}, and the whole tree under +directory @samp{dir1} from archive @samp{archive.tar.lz}. + +@example +tarlz -xf archive.tar.lz a c dir1 +@end example + +@sp 1 +@noindent +Example 8: Copy the contents of directory @samp{sourcedir} to the directory +@samp{destdir}. + +@example +tarlz -C sourcedir --uncompressed -cf - . | tarlz -C destdir -xf - +@end example + +@sp 1 +@noindent +Example 9: Compress the existing POSIX archive @samp{archive.tar} and write +the output to @samp{archive.tar.lz}. Compress each member individually for +maximum availability. (If one member in the compressed archive gets damaged, +the other members can still be extracted). + +@example +tarlz -z --no-solid archive.tar +@end example + +@sp 1 +@noindent +Example 10: Compress the archive @samp{archive.tar} and write the output to +@samp{foo.tar.lz}. + +@example +tarlz -z -o foo.tar.lz archive.tar +@end example + +@sp 1 +@noindent +Example 11: Concatenate and compress two archives @samp{archive1.tar} and +@samp{archive2.tar}, and write the output to @samp{foo.tar.lz}. + +@example +tarlz -A archive1.tar archive2.tar | tarlz -z -o foo.tar.lz +@end example + + +@node Problems +@chapter Reporting bugs +@cindex bugs +@cindex getting help + +There are probably bugs in tarlz. There are certainly errors and +omissions in this manual. If you report them, they will get fixed. If +you don't, no one will ever know about them and they will remain unfixed +for all eternity, if not longer. + +If you find a bug in tarlz, please send electronic mail to +@email{lzip-bug@@nongnu.org}. Include the version number, which you can +find by running @w{@samp{tarlz --version}} and +@w{@samp{tarlz -v --check-lib}}. + + +@node Concept index +@unnumbered Concept index + +@printindex cp + +@bye diff --git a/exclude.cc b/exclude.cc new file mode 100644 index 0000000..99491ca --- /dev/null +++ b/exclude.cc @@ -0,0 +1,54 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include + +#include "tarlz.h" + + +namespace Exclude { + +std::vector< std::string > patterns; // list of patterns + +} // end namespace Exclude + + +void Exclude::add_pattern( const std::string & arg ) + { patterns.push_back( arg ); } + + +bool Exclude::excluded( const char * const filename ) + { + if( patterns.empty() ) return false; + const char * p = filename; + do { + for( unsigned i = 0; i < patterns.size(); ++i ) + // ignore a trailing sequence starting with '/' in filename +#ifdef FNM_LEADING_DIR + if( fnmatch( patterns[i].c_str(), p, FNM_LEADING_DIR ) == 0 ) return true; +#else + if( fnmatch( patterns[i].c_str(), p, 0 ) == 0 || + fnmatch( ( patterns[i] + "/*" ).c_str(), p, 0 ) == 0 ) return true; +#endif + while( *p && *p != '/' ) ++p; // skip component + while( *p == '/' ) ++p; // skip slashes + } while( *p ); + return false; + } diff --git a/extended.cc b/extended.cc new file mode 100644 index 0000000..f05d15f --- /dev/null +++ b/extended.cc @@ -0,0 +1,415 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include + +#include "tarlz.h" + + +const CRC32 crc32c( true ); + + +namespace { + +unsigned long long record_size( const unsigned keyword_size, + const unsigned long value_size ) + { + /* length + ' ' + keyword + '=' + value + '\n' + minimize length; prefer "99<97_bytes>" to "100<97_bytes>" */ + unsigned long long size = 1 + keyword_size + 1 + value_size + 1; + size += decimal_digits( decimal_digits( size ) + size ); + return size; + } + + +unsigned long long parse_decimal( const char * const ptr, + const char ** const tailp, + const unsigned long long size ) + { + unsigned long long result = 0; + unsigned long long i = 0; + while( i < size && std::isspace( (unsigned char)ptr[i] ) ) ++i; + if( !std::isdigit( (unsigned char)ptr[i] ) ) + { if( tailp ) *tailp = ptr; return 0; } + for( ; i < size && std::isdigit( (unsigned char)ptr[i] ); ++i ) + { + const unsigned long long prev = result; + result *= 10; result += ptr[i] - '0'; + if( result < prev || result > LLONG_MAX ) // overflow + { if( tailp ) *tailp = ptr; return 0; } + } + if( tailp ) *tailp = ptr + i; + return result; + } + + +uint32_t parse_record_crc( const char * const ptr ) + { + uint32_t crc = 0; + for( int i = 0; i < 8; ++i ) + { + crc <<= 4; + if( ptr[i] >= '0' && ptr[i] <= '9' ) crc += ptr[i] - '0'; + else if( ptr[i] >= 'A' && ptr[i] <= 'F' ) crc += ptr[i] + 10 - 'A'; + else if( ptr[i] >= 'a' && ptr[i] <= 'f' ) crc += ptr[i] + 10 - 'a'; + else { crc = 0; break; } // invalid digit in crc string + } + return crc; + } + + +unsigned char xdigit( const unsigned value ) + { + if( value <= 9 ) return '0' + value; + if( value <= 15 ) return 'A' + value - 10; + return 0; + } + +void print_hex( char * const buf, int size, unsigned long long num ) + { while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; } } + +void print_decimal( char * const buf, int size, unsigned long long num ) + { while( --size >= 0 ) { buf[size] = num % 10 + '0'; num /= 10; } } + +unsigned long long print_size_keyword( char * const buf, + const unsigned long long size, const char * keyword ) + { + // "size keyword=value\n" + unsigned long long pos = decimal_digits( size ); + print_decimal( buf, pos, size ); buf[pos++] = ' '; + while( *keyword ) { buf[pos++] = *keyword; ++keyword; } buf[pos++] = '='; + return pos; + } + +bool print_record( char * const buf, const unsigned long long size, + const char * keyword, const std::string & value ) + { + unsigned long long pos = print_size_keyword( buf, size, keyword ); + std::memcpy( buf + pos, value.c_str(), value.size() ); + pos += value.size(); buf[pos++] = '\n'; + return pos == size; + } + +bool print_record( char * const buf, const int size, + const char * keyword, const unsigned long long value ) + { + int pos = print_size_keyword( buf, size, keyword ); + const int vd = decimal_digits( value ); + print_decimal( buf + pos, vd, value ); pos += vd; buf[pos++] = '\n'; + return pos == size; + } + +bool print_record( char * const buf, const int size, + const char * keyword, const Etime & value ) + { + int pos = print_size_keyword( buf, size, keyword ); + pos += value.print( buf + pos ); buf[pos++] = '\n'; + return pos == size; + } + +} // end namespace + + +unsigned Etime::decimal_size() const + { + unsigned size = 1 + ( sec_ < 0 ); // first digit + negative sign + for( long long n = sec_; n >= 10 || n <= -10; n /= 10 ) ++size; + if( nsec_ > 0 && nsec_ <= 999999999 ) + { size += 2; // decimal point + first fractional digit + for( int n = nsec_; n >= 10; n /= 10 ) ++size; } + return size; + } + +unsigned Etime::print( char * const buf ) const + { + int len = 0; + if( nsec_ > 0 && nsec_ <= 999999999 ) + { for( int n = nsec_; n > 0; n /= 10 ) buf[len++] = n % 10 + '0'; + buf[len++] = '.'; } + long long n = sec_; + do { long long on = n; n /= 10; buf[len++] = llabs( on - 10 * n ) + '0'; } + while( n != 0 ); + if( sec_ < 0 ) buf[len++] = '-'; + for( int i = 0; i < len / 2; ++i ) std::swap( buf[i], buf[len-i-1] ); + return len; + } + +bool Etime::parse( const char * const ptr, const char ** const tailp, + const long long size ) + { + char * tail; + errno = 0; + long long s = strtoll( ptr, &tail, 10 ); + if( tail == ptr || errno || + ( *tail != 0 && *tail != '\n' && *tail != '.' ) ) return false; + int ns = 0; + if( *tail == '.' ) // parse nanoseconds and any extra digits + { + ++tail; + if( tail - ptr >= size || !std::isdigit( (unsigned char)*tail ) ) + return false; + for( int factor = 100000000; + tail - ptr < size && std::isdigit( (unsigned char)*tail ); + ++tail, factor /= 10 ) + ns += factor * ( *tail - '0' ); + } + sec_ = s; nsec_ = ns; if( tailp ) *tailp = tail; + return true; + } + + +std::vector< std::string > Extended::unknown_keywords; +const std::string Extended::crc_record( "22 GNU.crc32=00000000\n" ); + +void Extended::calculate_sizes() const + { + linkpath_recsize_ = linkpath_.size() ? record_size( 8, linkpath_.size() ) : 0; + path_recsize_ = path_.size() ? record_size( 4, path_.size() ) : 0; + file_size_recsize_ = + ( file_size_ > 0 ) ? record_size( 4, decimal_digits( file_size_ ) ) : 0; + uid_recsize_ = ( uid_ >= 0 ) ? record_size( 3, decimal_digits( uid_ ) ) : 0; + gid_recsize_ = ( gid_ >= 0 ) ? record_size( 3, decimal_digits( gid_ ) ) : 0; + atime_recsize_ = + atime_.out_of_ustar_range() ? record_size( 5, atime_.decimal_size() ) : 0; + mtime_recsize_ = + mtime_.out_of_ustar_range() ? record_size( 5, mtime_.decimal_size() ) : 0; + edsize_ = linkpath_recsize_ + path_recsize_ + file_size_recsize_ + + uid_recsize_ + gid_recsize_ + atime_recsize_ + mtime_recsize_ + + crc_record.size(); + padded_edsize_ = round_up( edsize_ ); + full_size_ = header_size + padded_edsize_; + } + + +// print a diagnostic for each unknown keyword once per keyword +void Extended::unknown_keyword( const char * const buf, + const unsigned long long size ) const + { + unsigned long long eq_pos = 0; // position of '=' in buf + while( eq_pos < size && buf[eq_pos] != '=' ) ++eq_pos; + const std::string keyword( buf, eq_pos ); + for( unsigned i = 0; i < unknown_keywords.size(); ++i ) + if( keyword == unknown_keywords[i] ) return; + unknown_keywords.push_back( keyword ); + print_error( 0, "Ignoring unknown extended header keyword '%s'", + keyword.c_str() ); + } + + +// Return the size of the extended block, -1 if error, -2 if out of memory. +long long Extended::format_block( Resizable_buffer & rbuf ) const + { + if( empty() ) return 0; // no extended data + const unsigned long long bufsize = full_size(); // recalculate sizes + if( edsize_ <= 0 ) return 0; // no extended data + if( edsize_ >= 1LL << 33 ) return -1; // too much extended data + if( !rbuf.resize( bufsize ) ) return -2; // extended block buffer + uint8_t * const header = rbuf.u8(); // extended header + char * const buf = rbuf() + header_size; // extended records + init_tar_header( header ); + header[typeflag_o] = tf_extended; // fill only required fields + print_octal( header + size_o, size_l - 1, edsize_ ); + print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) ); + + if( path_recsize_ && !print_record( buf, path_recsize_, "path", path_ ) ) + return -1; + long long pos = path_recsize_; + if( linkpath_recsize_ && + !print_record( buf + pos, linkpath_recsize_, "linkpath", linkpath_ ) ) + return -1; + pos += linkpath_recsize_; + if( file_size_recsize_ && + !print_record( buf + pos, file_size_recsize_, "size", file_size_ ) ) + return -1; + pos += file_size_recsize_; + if( uid_recsize_ && !print_record( buf + pos, uid_recsize_, "uid", uid_ ) ) + return -1; + pos += uid_recsize_; + if( gid_recsize_ && !print_record( buf + pos, gid_recsize_, "gid", gid_ ) ) + return -1; + pos += gid_recsize_; + if( atime_recsize_ && + !print_record( buf + pos, atime_recsize_, "atime", atime_ ) ) + return -1; + pos += atime_recsize_; + if( mtime_recsize_ && + !print_record( buf + pos, mtime_recsize_, "mtime", mtime_ ) ) + return -1; + pos += mtime_recsize_; + const unsigned crc_size = Extended::crc_record.size(); + std::memcpy( buf + pos, Extended::crc_record.c_str(), crc_size ); + pos += crc_size; + if( pos != edsize_ ) return -1; + print_hex( buf + edsize_ - 9, 8, + crc32c.windowed_crc( (const uint8_t *)buf, edsize_ - 9, edsize_ ) ); + if( padded_edsize_ > edsize_ ) // set padding to zero + std::memset( buf + edsize_, 0, padded_edsize_ - edsize_ ); + crc_present_ = true; + return bufsize; + } + + +bool Extended::parse( const char * const buf, const unsigned long long edsize, + const bool permissive ) + { + reset(); full_size_ = -1; // invalidate cached sizes + for( unsigned long long pos = 0; pos < edsize; ) // parse records + { + const char * tail; + const unsigned long long rsize = + parse_decimal( buf + pos, &tail, edsize - pos ); + if( rsize == 0 || rsize > edsize - pos || + tail[0] != ' ' || buf[pos+rsize-1] != '\n' ) return false; + ++tail; // point to keyword + // rest = length of (keyword + '=' + value) without the final newline + const unsigned long long rest = ( buf + ( pos + rsize - 1 ) ) - tail; + if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 ) + { + if( path_.size() && !permissive ) return false; + unsigned long long len = rest - 5; + while( len > 1 && tail[5+len-1] == '/' ) --len; // trailing '/' + path_.assign( tail + 5, len ); + // this also truncates path_ at the first embedded null character + path_.assign( remove_leading_dotslash( path_.c_str(), &removed_prefix ) ); + } + else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 ) + { + if( linkpath_.size() && !permissive ) return false; + unsigned long long len = rest - 9; + while( len > 1 && tail[9+len-1] == '/' ) --len; // trailing '/' + linkpath_.assign( tail + 9, len ); + } + else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 ) + { + if( file_size_ != 0 && !permissive ) return false; + file_size_ = parse_decimal( tail + 5, &tail, rest - 5 ); + // parse error or size fits in ustar header + if( file_size_ < 1LL << 33 || file_size_ > max_file_size || + tail != buf + ( pos + rsize - 1 ) ) return false; + } + else if( rest > 4 && std::memcmp( tail, "uid=", 4 ) == 0 ) + { + if( uid_ >= 0 && !permissive ) return false; + uid_ = parse_decimal( tail + 4, &tail, rest - 4 ); + // parse error or uid fits in ustar header + if( uid_ < 1 << 21 || tail != buf + ( pos + rsize - 1 ) ) return false; + } + else if( rest > 4 && std::memcmp( tail, "gid=", 4 ) == 0 ) + { + if( gid_ >= 0 && !permissive ) return false; + gid_ = parse_decimal( tail + 4, &tail, rest - 4 ); + // parse error or gid fits in ustar header + if( gid_ < 1 << 21 || tail != buf + ( pos + rsize - 1 ) ) return false; + } + else if( rest > 6 && std::memcmp( tail, "atime=", 6 ) == 0 ) + { + if( atime_.isvalid() && !permissive ) return false; + if( !atime_.parse( tail + 6, &tail, rest - 6 ) || // parse error + tail != buf + ( pos + rsize - 1 ) ) return false; + } + else if( rest > 6 && std::memcmp( tail, "mtime=", 6 ) == 0 ) + { + if( mtime_.isvalid() && !permissive ) return false; + if( !mtime_.parse( tail + 6, &tail, rest - 6 ) || // parse error + tail != buf + ( pos + rsize - 1 ) ) return false; + } + else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 ) + { + if( crc_present_ && !permissive ) return false; + if( rsize != crc_record.size() ) return false; + crc_present_ = true; + const uint32_t stored_crc = parse_record_crc( tail + 10 ); + const uint32_t computed_crc = + crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize ); + if( stored_crc != computed_crc ) + { + if( verbosity >= 2 ) + std::fprintf( stderr, "CRC32C = %08X\n", (unsigned)computed_crc ); + return false; + } + } + else if( ( rest < 8 || std::memcmp( tail, "comment=", 8 ) != 0 ) && + verbosity >= 1 ) unknown_keyword( tail, rest ); + pos += rsize; + } + return true; + } + + +/* If not already initialized, copy linkpath, path, file_size, uid, gid, + atime, and mtime from ustar header. */ +void Extended::fill_from_ustar( const Tar_header header ) + { + if( linkpath_.empty() ) // copy linkpath from ustar header + { + int len = 0; + while( len < linkname_l && header[linkname_o+len] ) ++len; + while( len > 1 && header[linkname_o+len-1] == '/' ) --len; // trailing '/' + if( len > 0 ) + { + linkpath_.assign( (const char *)header + linkname_o, len ); + full_size_ = -1; + } + } + + if( path_.empty() ) // copy path from ustar header + { // the entire path may be in prefix + char stored_name[prefix_l+1+name_l+1]; + int len = 0; + while( len < prefix_l && header[prefix_o+len] ) + { stored_name[len] = header[prefix_o+len]; ++len; } + if( len && header[name_o] ) stored_name[len++] = '/'; + for( int i = 0; i < name_l && header[name_o+i]; ++i ) + { stored_name[len] = header[name_o+i]; ++len; } + while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/' + stored_name[len] = 0; + path( remove_leading_dotslash( stored_name, &removed_prefix ) ); + } + + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( file_size_ == 0 && // copy file_size from ustar header + ( typeflag == tf_regular || typeflag == tf_hiperf ) ) + file_size( parse_octal( header + size_o, size_l ) ); + if( uid_ < 0 ) uid_ = parse_octal( header + uid_o, uid_l ); + if( gid_ < 0 ) gid_ = parse_octal( header + gid_o, gid_l ); + if( !atime_.isvalid() ) + atime_.set( parse_octal( header + mtime_o, mtime_l ) ); // 33 bits + if( !mtime_.isvalid() ) + mtime_.set( parse_octal( header + mtime_o, mtime_l ) ); // 33 bits + } + + +/* Return file size from record or from ustar header, and reset file_size_. + Used for fast parsing of headers in uncompressed archives. +*/ +long long Extended::get_file_size_and_reset( const Tar_header header ) + { + const long long tmp = file_size_; + file_size( 0 ); // reset full_size_ + const Typeflag typeflag = (Typeflag)header[typeflag_o]; + if( typeflag == tf_regular || typeflag == tf_hiperf ) + { + if( tmp == 0 ) return parse_octal( header + size_o, size_l ); + else return tmp; + } + return 0; + } diff --git a/lzip_index.cc b/lzip_index.cc new file mode 100644 index 0000000..b886d2b --- /dev/null +++ b/lzip_index.cc @@ -0,0 +1,221 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include + +#include "tarlz.h" +#include "lzip_index.h" + + +int seek_read( const int fd, uint8_t * const buf, const int size, + const long long pos ) + { + if( lseek( fd, pos, SEEK_SET ) == pos ) + return readblock( fd, buf, size ); + return 0; + } + +namespace { + +const char * bad_version( const unsigned version ) + { + static char buf[80]; + snprintf( buf, sizeof buf, "Version %u member format not supported.", + version ); + return buf; + } + +} // end namespace + + +bool Lzip_index::check_header_error( const Lzip_header & header, + const bool first ) + { + if( !header.verify_magic() ) + { error_ = bad_magic_msg; retval_ = 2; if( first ) bad_magic_ = true; + return true; } + if( !header.verify_version() ) + { error_ = bad_version( header.version() ); retval_ = 2; return true; } + if( !isvalid_ds( header.dictionary_size() ) ) + { error_ = bad_dict_msg; retval_ = 2; return true; } + return false; + } + +void Lzip_index::set_errno_error( const char * const msg ) + { + error_ = msg; error_ += std::strerror( errno ); + retval_ = 1; + } + +void Lzip_index::set_num_error( const char * const msg, unsigned long long num ) + { + char buf[80]; + snprintf( buf, sizeof buf, "%s%llu", msg, num ); + error_ = buf; + retval_ = 2; + } + + +bool Lzip_index::read_header( const int fd, Lzip_header & header, + const long long pos ) + { + if( seek_read( fd, header.data, Lzip_header::size, pos ) != Lzip_header::size ) + { set_errno_error( "Error reading member header: " ); return false; } + return true; + } + + +// If successful, push last member and set pos to member header. +bool Lzip_index::skip_trailing_data( const int fd, unsigned long long & pos, + const bool ignore_trailing, + const bool loose_trailing ) + { + if( pos < min_member_size ) return false; + enum { block_size = 16384, + buffer_size = block_size + Lzip_trailer::size - 1 + Lzip_header::size }; + uint8_t buffer[buffer_size]; + int bsize = pos % block_size; // total bytes in buffer + if( bsize <= buffer_size - block_size ) bsize += block_size; + int search_size = bsize; // bytes to search for trailer + int rd_size = bsize; // bytes to read from file + unsigned long long ipos = pos - rd_size; // aligned to block_size + + while( true ) + { + if( seek_read( fd, buffer, rd_size, ipos ) != rd_size ) + { set_errno_error( "Error seeking member trailer: " ); return false; } + const uint8_t max_msb = ( ipos + search_size ) >> 56; + for( int i = search_size; i >= Lzip_trailer::size; --i ) + if( buffer[i-1] <= max_msb ) // most significant byte of member_size + { + const Lzip_trailer & trailer = + *(const Lzip_trailer *)( buffer + i - Lzip_trailer::size ); + const unsigned long long member_size = trailer.member_size(); + if( member_size == 0 ) // skip trailing zeros + { while( i > Lzip_trailer::size && buffer[i-9] == 0 ) --i; continue; } + if( member_size > ipos + i || !trailer.verify_consistency() ) + continue; + Lzip_header header; + if( !read_header( fd, header, ipos + i - member_size ) ) return false; + if( !header.verify() ) continue; + const Lzip_header & header2 = *(const Lzip_header *)( buffer + i ); + const bool full_h2 = bsize - i >= Lzip_header::size; + if( header2.verify_prefix( bsize - i ) ) // last member + { + if( !full_h2 ) error_ = "Last member in input file is truncated."; + else if( !check_header_error( header2, false ) ) + error_ = "Last member in input file is truncated or corrupt."; + retval_ = 2; return false; + } + if( !loose_trailing && full_h2 && header2.verify_corrupt() ) + { error_ = corrupt_mm_msg; retval_ = 2; return false; } + if( !ignore_trailing ) + { error_ = trailing_msg; retval_ = 2; return false; } + pos = ipos + i - member_size; + const unsigned dictionary_size = header.dictionary_size(); + member_vector.push_back( Member( 0, trailer.data_size(), pos, + member_size, dictionary_size ) ); + if( dictionary_size_ < dictionary_size ) + dictionary_size_ = dictionary_size; + return true; + } + if( ipos == 0 ) + { set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size ); + return false; } + bsize = buffer_size; + search_size = bsize - Lzip_header::size; + rd_size = block_size; + ipos -= rd_size; + std::memcpy( buffer + rd_size, buffer, buffer_size - rd_size ); + } + } + + +Lzip_index::Lzip_index( const int infd, const bool ignore_trailing, + const bool loose_trailing ) + : insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ), dictionary_size_( 0 ), + bad_magic_( false ) + { + if( insize < 0 ) + { set_errno_error( "Input file is not seekable: " ); return; } + if( insize < min_member_size ) + { error_ = "Input file is too short."; retval_ = 2; return; } + if( insize > INT64_MAX ) + { error_ = "Input file is too long (2^63 bytes or more)."; + retval_ = 2; return; } + + Lzip_header header; + if( !read_header( infd, header, 0 ) ) return; + if( check_header_error( header, true ) ) return; + + unsigned long long pos = insize; // always points to a header or to EOF + while( pos >= min_member_size ) + { + Lzip_trailer trailer; + if( seek_read( infd, trailer.data, Lzip_trailer::size, + pos - Lzip_trailer::size ) != Lzip_trailer::size ) + { set_errno_error( "Error reading member trailer: " ); break; } + const unsigned long long member_size = trailer.member_size(); + if( member_size > pos || !trailer.verify_consistency() ) // bad trailer + { + if( member_vector.empty() ) + { if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) ) + continue; else return; } + set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size ); + break; + } + if( !read_header( infd, header, pos - member_size ) ) break; + if( !header.verify() ) // bad header + { + if( member_vector.empty() ) + { if( skip_trailing_data( infd, pos, ignore_trailing, loose_trailing ) ) + continue; else return; } + set_num_error( "Bad header at pos ", pos - member_size ); + break; + } + pos -= member_size; + const unsigned dictionary_size = header.dictionary_size(); + member_vector.push_back( Member( 0, trailer.data_size(), pos, + member_size, dictionary_size ) ); + if( dictionary_size_ < dictionary_size ) + dictionary_size_ = dictionary_size; + } + if( pos != 0 || member_vector.empty() ) + { + member_vector.clear(); + if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; } + return; + } + std::reverse( member_vector.begin(), member_vector.end() ); + for( unsigned long i = 0; ; ++i ) + { + const long long end = member_vector[i].dblock.end(); + if( end < 0 || end > INT64_MAX ) + { + member_vector.clear(); + error_ = "Data in input file is too long (2^63 bytes or more)."; + retval_ = 2; return; + } + if( i + 1 >= member_vector.size() ) break; + member_vector[i+1].dblock.pos( end ); + } + } diff --git a/lzip_index.h b/lzip_index.h new file mode 100644 index 0000000..af8aaa4 --- /dev/null +++ b/lzip_index.h @@ -0,0 +1,93 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef INT64_MAX +#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL +#endif + + +class Block + { + long long pos_, size_; // pos + size <= INT64_MAX + +public: + Block( const long long p, const long long s ) : pos_( p ), size_( s ) {} + + long long pos() const { return pos_; } + long long size() const { return size_; } + long long end() const { return pos_ + size_; } + + void pos( const long long p ) { pos_ = p; } + void size( const long long s ) { size_ = s; } + }; + + +class Lzip_index + { + struct Member + { + Block dblock, mblock; // data block, member block + unsigned dictionary_size; + + Member( const long long dp, const long long ds, + const long long mp, const long long ms, const unsigned dict_size ) + : dblock( dp, ds ), mblock( mp, ms ), dictionary_size( dict_size ) {} + }; + + std::vector< Member > member_vector; + std::string error_; + const long long insize; + int retval_; + unsigned dictionary_size_; // largest dictionary size in the file + bool bad_magic_; // bad magic in first header + + bool check_header_error( const Lzip_header & header, const bool first ); + void set_errno_error( const char * const msg ); + void set_num_error( const char * const msg, unsigned long long num ); + bool read_header( const int fd, Lzip_header & header, const long long pos ); + bool skip_trailing_data( const int fd, unsigned long long & pos, + const bool ignore_trailing, const bool loose_trailing ); + +public: + Lzip_index( const int infd, const bool ignore_trailing, + const bool loose_trailing ); + + long members() const { return member_vector.size(); } + const std::string & error() const { return error_; } + int retval() const { return retval_; } + unsigned dictionary_size() const { return dictionary_size_; } + bool bad_magic() const { return bad_magic_; } + + long long udata_size() const + { if( member_vector.empty() ) return 0; + return member_vector.back().dblock.end(); } + + long long cdata_size() const + { if( member_vector.empty() ) return 0; + return member_vector.back().mblock.end(); } + + // total size including trailing data (if any) + long long file_size() const + { if( insize >= 0 ) return insize; else return 0; } + + const Block & dblock( const long i ) const + { return member_vector[i].dblock; } + const Block & mblock( const long i ) const + { return member_vector[i].mblock; } + unsigned dictionary_size( const long i ) const + { return member_vector[i].dictionary_size; } + }; diff --git a/main.cc b/main.cc new file mode 100644 index 0000000..5a85ebb --- /dev/null +++ b/main.cc @@ -0,0 +1,723 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +/* + Exit status: 0 for a normal exit, 1 for environmental problems + (file not found, files differ, invalid command line options, I/O errors, + etc), 2 to indicate a corrupt or invalid input file, 3 for an internal + consistency error (e.g., bug) which caused tarlz to panic. +*/ + +#define _FILE_OFFSET_BITS 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include // for lzlib.h +#include +#include +#include +#include +#include +#if defined __OS2__ +#include +#endif + +#include "tarlz.h" +#include "arg_parser.h" + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#if CHAR_BIT != 8 +#error "Environments where CHAR_BIT != 8 are not supported." +#endif + +int verbosity = 0; +const char * const program_name = "tarlz"; + +namespace { + +const char * const program_year = "2022"; +const char * invocation_name = program_name; // default value + + +void show_help( const long num_online ) + { + std::printf( "Tarlz is a massively parallel (multi-threaded) combined implementation of\n" + "the tar archiver and the lzip compressor. Tarlz uses the compression library\n" + "lzlib.\n" + "\nTarlz creates, lists, and extracts archives in a simplified and safer\n" + "variant of the POSIX pax format compressed in lzip format, keeping the\n" + "alignment between tar members and lzip members. The resulting multimember\n" + "tar.lz archive is fully backward compatible with standard tar tools like GNU\n" + "tar, which treat it like any other tar.lz archive. Tarlz can append files to\n" + "the end of such compressed archives.\n" + "\nKeeping the alignment between tar members and lzip members has two\n" + "advantages. It adds an indexed lzip layer on top of the tar archive, making\n" + "it possible to decode the archive safely in parallel. It also minimizes the\n" + "amount of data lost in case of corruption.\n" + "\nThe tarlz file format is a safe POSIX-style backup format. In case of\n" + "corruption, tarlz can extract all the undamaged members from the tar.lz\n" + "archive, skipping over the damaged members, just like the standard\n" + "(uncompressed) tar. Moreover, the option '--keep-damaged' can be used to\n" + "recover as much data as possible from each damaged member, and lziprecover\n" + "can be used to recover some of the damaged members.\n" + "\nUsage: %s operation [options] [files]\n", invocation_name ); + std::printf( "\nOperations:\n" + " --help display this help and exit\n" + " -V, --version output version information and exit\n" + " -A, --concatenate append archives to the end of an archive\n" + " -c, --create create a new archive\n" + " -d, --diff find differences between archive and file system\n" + " --delete delete files/directories from an archive\n" + " -r, --append append files to the end of an archive\n" + " -t, --list list the contents of an archive\n" + " -x, --extract extract files/directories from an archive\n" + " -z, --compress compress existing POSIX tar archives\n" + " --check-lib check version of lzlib and exit\n" + "\nOptions:\n" + " -B, --data-size= set target size of input data blocks [2x8=16 MiB]\n" + " -C, --directory= change to directory \n" + " -f, --file= use archive file \n" + " -h, --dereference follow symlinks; archive the files they point to\n" + " -n, --threads= set number of (de)compression threads [%ld]\n" + " -o, --output= compress to \n" + " -p, --preserve-permissions don't subtract the umask on extraction\n" + " -q, --quiet suppress all messages\n" + " -v, --verbose verbosely list files processed\n" + " -0 .. -9 set compression level [default 6]\n" + " --uncompressed don't compress the archive created\n" + " --asolid create solidly compressed appendable archive\n" + " --bsolid create per block compressed archive (default)\n" + " --dsolid create per directory compressed archive\n" + " --no-solid create per file compressed archive\n" + " --solid create solidly compressed archive\n" + " --anonymous equivalent to '--owner=root --group=root'\n" + " --owner= use name/ID for files added to archive\n" + " --group= use name/ID for files added to archive\n" + " --exclude= exclude files matching a shell pattern\n" + " --ignore-ids ignore differences in owner and group IDs\n" + " --ignore-overflow ignore mtime overflow differences on 32-bit\n" + " --keep-damaged don't delete partially extracted files\n" + " --missing-crc exit with error status if missing extended CRC\n" + " --mtime= use as mtime for files added to archive\n" + " --out-slots= number of 1 MiB output packets buffered [64]\n" + " --warn-newer warn if any file is newer than the archive\n" +/* " --permissive allow repeated extended headers and records\n"*/, + num_online ); + if( verbosity >= 1 ) + { + std::printf( " --debug= (0-1) print debug statistics to stderr\n" ); + } + std::printf( "\nIf no archive is specified, tarlz tries to read it from standard input or\n" + "write it to standard output.\n" + "\nExit status: 0 for a normal exit, 1 for environmental problems\n" + "(file not found, files differ, invalid command line options, I/O errors,\n" + "etc), 2 to indicate a corrupt or invalid input file, 3 for an internal\n" + "consistency error (e.g., bug) which caused tarlz to panic.\n" + "\nReport bugs to lzip-bug@nongnu.org\n" + "Tarlz home page: http://www.nongnu.org/lzip/tarlz.html\n" ); + } + + +void show_version() + { + std::printf( "%s %s\n", program_name, PROGVERSION ); + std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year ); + std::printf( "Using lzlib %s\n", LZ_version() ); + std::printf( "License GPLv2+: GNU GPL version 2 or later \n" + "This is free software: you are free to change and redistribute it.\n" + "There is NO WARRANTY, to the extent permitted by law.\n" ); + } + + +int check_lzlib_ver() // . or .[a-z.-]* + { +#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012 + const unsigned char * p = (unsigned char *)LZ_version_string; + unsigned major = 0, minor = 0; + while( major < 100000 && isdigit( *p ) ) + { major *= 10; major += *p - '0'; ++p; } + if( *p == '.' ) ++p; + else +out: { show_error( "Invalid LZ_version_string in lzlib.h" ); return 2; } + while( minor < 100 && isdigit( *p ) ) + { minor *= 10; minor += *p - '0'; ++p; } + if( *p && *p != '-' && *p != '.' && !std::islower( *p ) ) goto out; + const unsigned version = major * 1000 + minor; + if( LZ_API_VERSION != version ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Version mismatch in lzlib.h: " + "LZ_API_VERSION = %u, should be %u.\n", + program_name, LZ_API_VERSION, version ); + return 2; + } +#endif + return 0; + } + + +int check_lib() + { + int retval = check_lzlib_ver(); + if( std::strcmp( LZ_version_string, LZ_version() ) != 0 ) + { set_retval( retval, 1 ); + if( verbosity >= 0 ) + std::printf( "warning: LZ_version_string != LZ_version() (%s vs %s)\n", + LZ_version_string, LZ_version() ); } +#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012 + if( LZ_API_VERSION != LZ_api_version() ) + { set_retval( retval, 1 ); + if( verbosity >= 0 ) + std::printf( "warning: LZ_API_VERSION != LZ_api_version() (%u vs %u)\n", + LZ_API_VERSION, LZ_api_version() ); } +#endif + if( verbosity >= 1 ) + { + std::printf( "Using lzlib %s\n", LZ_version() ); +#if !defined LZ_API_VERSION + std::fputs( "LZ_API_VERSION is not defined.\n", stdout ); +#elif LZ_API_VERSION >= 1012 + std::printf( "Using LZ_API_VERSION = %u\n", LZ_api_version() ); +#else + std::printf( "Compiled with LZ_API_VERSION = %u. " + "Using an unknown LZ_API_VERSION\n", LZ_API_VERSION ); +#endif + } + return retval; + } + + +// separate numbers of 6 or more digits in groups of 3 digits using '_' +const char * format_num3( long long num ) + { + const char * const si_prefix = "kMGTPEZY"; + const char * const binary_prefix = "KMGTPEZY"; + enum { buffers = 8, bufsize = 4 * sizeof num }; + static char buffer[buffers][bufsize]; // circle of static buffers for printf + static int current = 0; + + char * const buf = buffer[current++]; current %= buffers; + char * p = buf + bufsize - 1; // fill the buffer backwards + *p = 0; // terminator + const bool negative = num < 0; + char prefix = 0; // try binary first, then si + for( int i = 0; i < 8 && num != 0 && ( num / 1024 ) * 1024 == num; ++i ) + { num /= 1024; prefix = binary_prefix[i]; } + if( prefix ) *(--p) = 'i'; + else + for( int i = 0; i < 8 && num != 0 && ( num / 1000 ) * 1000 == num; ++i ) + { num /= 1000; prefix = si_prefix[i]; } + if( prefix ) *(--p) = prefix; + const bool split = num >= 100000 || num <= -100000; + + for( int i = 0; ; ) + { + long long onum = num; num /= 10; + *(--p) = llabs( onum - 10 * num ) + '0'; if( num == 0 ) break; + if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; } + } + if( negative ) *(--p) = '-'; + return p; + } + + +void show_option_error( const char * const arg, const char * const msg, + const char * const option_name ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: '%s': %s option '%s'.\n", + program_name, arg, msg, option_name ); + } + + +long long getnum( const char * const arg, const char * const option_name, + const long long llimit = LLONG_MIN, + const long long ulimit = LLONG_MAX ) + { + char * tail; + errno = 0; + long long result = strtoll( arg, &tail, 0 ); + if( tail == arg ) + { show_option_error( arg, "Bad or missing numerical argument in", + option_name ); std::exit( 1 ); } + + if( !errno && tail[0] ) + { + const int factor = ( tail[1] == 'i' ) ? 1024 : 1000; + int exponent = 0; // 0 = bad multiplier + switch( tail[0] ) + { + case 'Y': exponent = 8; break; + case 'Z': exponent = 7; break; + case 'E': exponent = 6; break; + case 'P': exponent = 5; break; + case 'T': exponent = 4; break; + case 'G': exponent = 3; break; + case 'M': exponent = 2; break; + case 'K': if( factor == 1024 ) exponent = 1; break; + case 'k': if( factor == 1000 ) exponent = 1; break; + } + if( exponent <= 0 ) + { show_option_error( arg, "Bad multiplier in numerical argument of", + option_name ); std::exit( 1 ); } + for( int i = 0; i < exponent; ++i ) + { + if( ( result >= 0 && LLONG_MAX / factor >= result ) || + ( result < 0 && LLONG_MIN / factor <= result ) ) result *= factor; + else { errno = ERANGE; break; } + } + } + if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE; + if( errno ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in " + "option '%s'.\n", program_name, arg, format_num3( llimit ), + format_num3( ulimit ), option_name ); + std::exit( 1 ); + } + return result; + } + + +void set_archive_name( std::string & archive_name, const std::string & new_name ) + { + static bool first_call = true; + + if( first_call ) { if( new_name != "-" ) archive_name = new_name; + first_call = false; return; } + show_error( "Only one archive can be specified.", 0, true ); + std::exit( 1 ); + } + + +void set_mode( Program_mode & program_mode, const Program_mode new_mode ) + { + if( program_mode != m_none && program_mode != new_mode ) + { + show_error( "Only one operation can be specified.", 0, true ); + std::exit( 1 ); + } + program_mode = new_mode; + } + + +// parse time as 'long long' even if time_t is 32-bit +long long parse_mtime( const char * arg, const char * const pn ) + { + if( *arg == '@' ) return getnum( arg + 1, pn ); + else if( *arg == '.' || *arg == '/' ) + { + struct stat st; + if( stat( arg, &st ) == 0 ) return st.st_mtime; + show_file_error( arg, "Can't stat mtime reference file", errno ); + std::exit( 1 ); + } + else // format '[-]YYYY-MM-DD[[[HH]:MM]:SS]' + { + long long y; // long long because 2147483648-01-01 overflows int + unsigned mo, d, h, m, s; + char sep; + const int n = std::sscanf( arg, "%lld-%u-%u%c%u:%u:%u", + &y, &mo, &d, &sep, &h, &m, &s ); + if( n >= 3 && n <= 7 && n != 4 && ( n == 3 || sep == ' ' || sep == 'T' ) ) + { + if( y >= INT_MIN + 1900 && y <= INT_MAX && mo >= 1 && mo <= 12 ) + { + struct tm t; + t.tm_year = y - 1900; t.tm_mon = mo - 1; t.tm_mday = d; + t.tm_hour = ( n >= 5 ) ? h : 0; t.tm_min = ( n >= 6 ) ? m : 0; + t.tm_sec = ( n >= 7 ) ? s : 0; t.tm_isdst = -1; + errno = 0; + const long long mtime = std::mktime( &t ); + if( mtime != -1 || errno == 0 ) return mtime; // valid datetime + } + show_option_error( arg, "Date out of limits in", pn ); std::exit( 1 ); + } + } + show_option_error( arg, "Unknown date format in", pn ); std::exit( 1 ); + } + + +long long parse_owner( const char * const arg, const char * const pn ) + { + const struct passwd * const pw = getpwnam( arg ); + if( pw ) return pw->pw_uid; + if( std::isdigit( (unsigned char)arg[0] ) ) + return getnum( arg, pn, 0, LLONG_MAX ); + if( std::strcmp( arg, "root" ) == 0 ) return 0; + show_option_error( arg, "Invalid owner in", pn ); std::exit( 1 ); + } + +long long parse_group( const char * const arg, const char * const pn ) + { + const struct group * const gr = getgrnam( arg ); + if( gr ) return gr->gr_gid; + if( std::isdigit( (unsigned char)arg[0] ) ) + return getnum( arg, pn, 0, LLONG_MAX ); + if( std::strcmp( arg, "root" ) == 0 ) return 0; + show_option_error( arg, "Invalid group in", pn ); std::exit( 1 ); + } + +} // end namespace + + +int hstat( const char * const filename, struct stat * const st, + const bool dereference ) + { return dereference ? stat( filename, st ) : lstat( filename, st ); } + + +int open_instream( const std::string & name ) + { + const int infd = open( name.c_str(), O_RDONLY | O_BINARY ); + if( infd < 0 ) + { show_file_error( name.c_str(), "Can't open for reading", errno ); + return -1; } + struct stat st; // infd must not be a directory + if( fstat( infd, &st ) == 0 && S_ISDIR( st.st_mode ) ) + { show_file_error( name.c_str(), "Can't read. Is a directory." ); + close( infd ); return -1; } + return infd; + } + + +int open_outstream( const std::string & name, const bool create, + Resizable_buffer * const rbufp, const bool force ) + { + const int cflags = O_CREAT | O_WRONLY | ( force ? O_TRUNC : O_EXCL ); + const int flags = ( create ? cflags : O_RDWR ) | O_BINARY; + const mode_t outfd_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; + + const int outfd = open( name.c_str(), flags, outfd_mode ); + if( outfd < 0 ) + { + const char * msg = !create ? "Error opening file" : + ( ( errno == EEXIST ) ? "Skipping file" : "Can't create file" ); + if( !rbufp ) show_file_error( name.c_str(), msg, errno ); + else format_file_error( *rbufp, name.c_str(), msg, errno ); + } + return outfd; + } + + +/* This can be called from any thread, main thread or sub-threads alike, + since they all call common helper functions that call exit_fail_mt() + in case of an error. +*/ +void exit_fail_mt( const int retval ) + { + // calling 'exit' more than once results in undefined behavior + static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + + pthread_mutex_lock( &mutex ); // ignore errors to avoid loop + std::exit( retval ); + } + + +void show_error( const char * const msg, const int errcode, const bool help ) + { + if( verbosity < 0 ) return; + if( msg && msg[0] ) + std::fprintf( stderr, "%s: %s%s%s\n", program_name, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + if( help ) + std::fprintf( stderr, "Try '%s --help' for more information.\n", + invocation_name ); + } + + +bool format_error( Resizable_buffer & rbuf, const int errcode, + const char * const format, ... ) + { + if( verbosity < 0 ) { rbuf.resize( 1 ); rbuf()[0] = 0; return false; } + va_list args; + for( int i = 0; i < 2; ++i ) // resize rbuf if not large enough + { + int len = snprintf( rbuf(), rbuf.size(), "%s: ", program_name ); + if( len >= (int)rbuf.size() && !rbuf.resize( len + 1 ) ) break; + va_start( args, format ); + len += vsnprintf( rbuf() + len, rbuf.size() - len, format, args ); + va_end( args ); + if( len >= (int)rbuf.size() && !rbuf.resize( len + 1 ) ) break; + if( errcode <= 0 ) rbuf()[len++] = '\n'; + else len += snprintf( rbuf() + len, rbuf.size() - len, ": %s\n", + std::strerror( errcode ) ); + if( len < (int)rbuf.size() || !rbuf.resize( len + 1 ) ) break; + } + return true; + } + + +void print_error( const int errcode, const char * const format, ... ) + { + if( verbosity < 0 ) return; + va_list args; + std::fprintf( stderr, "%s: ", program_name ); + va_start( args, format ); + std::vfprintf( stderr, format, args ); + va_end( args ); + if( errcode <= 0 ) std::fputc( '\n', stderr ); + else std::fprintf( stderr, ": %s\n", std::strerror( errcode ) ); + } + + +void format_file_error( std::string & estr, const char * const filename, + const char * const msg, const int errcode ) + { + if( verbosity < 0 ) return; + estr += program_name; estr += ": "; estr += filename; estr += ": "; + estr += msg; + if( errcode > 0 ) { estr += ": "; estr += std::strerror( errcode ); } + estr += '\n'; + } + +bool format_file_error( Resizable_buffer & rbuf, const char * const filename, + const char * const msg, const int errcode ) + { + if( verbosity < 0 ) { rbuf.resize( 1 ); rbuf()[0] = 0; return false; } + for( int i = 0; i < 2; ++i ) // resize rbuf if not large enough + { + const int len = snprintf( rbuf(), rbuf.size(), "%s: %s: %s%s%s\n", + program_name, filename, msg, ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + if( len < (int)rbuf.size() || !rbuf.resize( len + 1 ) ) break; + } + return true; + } + +void show_file_error( const char * const filename, const char * const msg, + const int errcode ) + { + if( verbosity >= 0 && msg && msg[0] ) + std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg, + ( errcode > 0 ) ? ": " : "", + ( errcode > 0 ) ? std::strerror( errcode ) : "" ); + } + + +void internal_error( const char * const msg ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg ); + std::exit( 3 ); + } + + +int main( const int argc, const char * const argv[] ) + { + if( argc > 0 ) invocation_name = argv[0]; + + enum { opt_ano = 256, opt_aso, opt_bso, opt_chk, opt_crc, opt_dbg, opt_del, + opt_dso, opt_exc, opt_grp, opt_hlp, opt_id, opt_kd, opt_mti, opt_nso, + opt_ofl, opt_out, opt_own, opt_per, opt_sol, opt_un, opt_wn }; + const Arg_parser::Option options[] = + { + { '0', 0, Arg_parser::no }, + { '1', 0, Arg_parser::no }, + { '2', 0, Arg_parser::no }, + { '3', 0, Arg_parser::no }, + { '4', 0, Arg_parser::no }, + { '5', 0, Arg_parser::no }, + { '6', 0, Arg_parser::no }, + { '7', 0, Arg_parser::no }, + { '8', 0, Arg_parser::no }, + { '9', 0, Arg_parser::no }, + { 'A', "concatenate", Arg_parser::no }, + { 'B', "data-size", Arg_parser::yes }, + { 'c', "create", Arg_parser::no }, + { 'C', "directory", Arg_parser::yes }, + { 'd', "diff", Arg_parser::no }, + { 'f', "file", Arg_parser::yes }, + { 'h', "dereference", Arg_parser::no }, + { 'H', "format", Arg_parser::yes }, + { 'n', "threads", Arg_parser::yes }, + { 'o', "output", Arg_parser::yes }, + { 'p', "preserve-permissions", Arg_parser::no }, + { 'q', "quiet", Arg_parser::no }, + { 'r', "append", Arg_parser::no }, + { 't', "list", Arg_parser::no }, + { 'v', "verbose", Arg_parser::no }, + { 'V', "version", Arg_parser::no }, + { 'x', "extract", Arg_parser::no }, + { 'z', "compress", Arg_parser::no }, + { opt_ano, "anonymous", Arg_parser::no }, + { opt_aso, "asolid", Arg_parser::no }, + { opt_bso, "bsolid", Arg_parser::no }, + { opt_chk, "check-lib", Arg_parser::no }, + { opt_dbg, "debug", Arg_parser::yes }, + { opt_del, "delete", Arg_parser::no }, + { opt_dso, "dsolid", Arg_parser::no }, + { opt_exc, "exclude", Arg_parser::yes }, + { opt_grp, "group", Arg_parser::yes }, + { opt_hlp, "help", Arg_parser::no }, + { opt_id, "ignore-ids", Arg_parser::no }, + { opt_kd, "keep-damaged", Arg_parser::no }, + { opt_crc, "missing-crc", Arg_parser::no }, + { opt_mti, "mtime", Arg_parser::yes }, + { opt_nso, "no-solid", Arg_parser::no }, + { opt_ofl, "ignore-overflow", Arg_parser::no }, + { opt_out, "out-slots", Arg_parser::yes }, + { opt_own, "owner", Arg_parser::yes }, + { opt_per, "permissive", Arg_parser::no }, + { opt_sol, "solid", Arg_parser::no }, + { opt_un, "uncompressed", Arg_parser::no }, + { opt_wn, "warn-newer", Arg_parser::no }, + { 0, 0, Arg_parser::no } }; + + const Arg_parser parser( argc, argv, options, true ); // in_order + if( parser.error().size() ) // bad option + { show_error( parser.error().c_str(), 0, true ); return 1; } + Cl_options cl_opts( parser ); + + const long num_online = std::max( 1L, sysconf( _SC_NPROCESSORS_ONLN ) ); + long max_workers = sysconf( _SC_THREAD_THREADS_MAX ); + if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) ) + max_workers = INT_MAX / sizeof (pthread_t); + + const char * f_pn = 0; + const char * o_pn = 0; + const char * z_pn = 0; + for( int argind = 0; argind < parser.arguments(); ++argind ) + { + const int code = parser.code( argind ); + if( !code ) // skip non-options + { + if( parser.argument( argind ).empty() ) + { show_error( "Empty non-option argument." ); return 1; } + if( parser.argument( argind ) != "-" ) cl_opts.filenames_given = true; + ++cl_opts.num_files; continue; + } + const char * const pn = parser.parsed_name( argind ).c_str(); + const std::string & sarg = parser.argument( argind ); + const char * const arg = sarg.c_str(); + switch( code ) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + cl_opts.level = code - '0'; break; + case 'A': set_mode( cl_opts.program_mode, m_concatenate ); break; + case 'B': cl_opts.data_size = + getnum( arg, pn, min_data_size, max_data_size ); break; + case 'c': set_mode( cl_opts.program_mode, m_create ); break; + case 'C': break; // skip chdir + case 'd': set_mode( cl_opts.program_mode, m_diff ); break; + case 'f': set_archive_name( cl_opts.archive_name, sarg ); f_pn = pn; break; + case 'h': cl_opts.dereference = true; break; + case 'H': break; // ignore format + case 'n': cl_opts.num_workers = getnum( arg, pn, 0, max_workers ); break; + case 'o': cl_opts.output_filename = sarg; o_pn = pn; break; + case 'p': cl_opts.preserve_permissions = true; break; + case 'q': verbosity = -1; break; + case 'r': set_mode( cl_opts.program_mode, m_append ); break; + case 't': set_mode( cl_opts.program_mode, m_list ); break; + case 'v': if( verbosity < 4 ) ++verbosity; break; + case 'V': show_version(); return 0; + case 'x': set_mode( cl_opts.program_mode, m_extract ); break; + case 'z': set_mode( cl_opts.program_mode, m_compress ); z_pn = pn; break; + case opt_ano: cl_opts.uid = parse_owner( "root", pn ); + cl_opts.gid = parse_group( "root", pn ); break; + case opt_aso: cl_opts.solidity = asolid; break; + case opt_bso: cl_opts.solidity = bsolid; break; + case opt_crc: cl_opts.missing_crc = true; break; + case opt_chk: return check_lib(); + case opt_dbg: cl_opts.debug_level = getnum( arg, pn, 0, 3 ); break; + case opt_del: set_mode( cl_opts.program_mode, m_delete ); break; + case opt_dso: cl_opts.solidity = dsolid; break; + case opt_exc: Exclude::add_pattern( sarg ); break; + case opt_grp: cl_opts.gid = parse_group( arg, pn ); break; + case opt_hlp: show_help( num_online ); return 0; + case opt_id: cl_opts.ignore_ids = true; break; + case opt_kd: cl_opts.keep_damaged = true; break; + case opt_mti: cl_opts.mtime = parse_mtime( arg, pn ); + cl_opts.mtime_set = true; break; + case opt_nso: cl_opts.solidity = no_solid; break; + case opt_ofl: cl_opts.ignore_overflow = true; break; + case opt_out: cl_opts.out_slots = getnum( arg, pn, 1, 1024 ); break; + case opt_own: cl_opts.uid = parse_owner( arg, pn ); break; + case opt_per: cl_opts.permissive = true; break; + case opt_sol: cl_opts.solidity = solid; break; + case opt_un: cl_opts.level = -1; break; + case opt_wn: cl_opts.warn_newer = true; break; + default : internal_error( "uncaught option" ); + } + } // end process options + + if( cl_opts.program_mode != m_compress && o_pn ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Option '%s' can only be used with " + "'-z, --compress'.\n", program_name, o_pn ); + return 1; + } + if( cl_opts.program_mode == m_compress && f_pn ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Option '%s' can't be used with '%s'.\n", + program_name, f_pn, z_pn ); + return 1; + } + if( cl_opts.program_mode == m_compress && + ( cl_opts.level < 0 || cl_opts.level > 9 ) ) + { + if( verbosity >= 0 ) + std::fprintf( stderr, "%s: Option '--uncompressed' can't be used with '%s'.\n", + program_name, z_pn ); + return 1; + } + +#if !defined LZ_API_VERSION || LZ_API_VERSION < 1012 // compile-time test +#error "lzlib 1.12 or newer needed." +#endif + if( LZ_api_version() < 1012 ) // runtime test + { show_error( "Wrong library version. At least lzlib 1.12 is required." ); + return 1; } + +#if defined __OS2__ + setmode( STDIN_FILENO, O_BINARY ); + setmode( STDOUT_FILENO, O_BINARY ); +#endif + + if( cl_opts.data_size <= 0 && cl_opts.level >= 0 && cl_opts.level <= 9 ) + { + if( cl_opts.level == 0 ) cl_opts.data_size = 1 << 20; + else cl_opts.data_size = 2 * option_mapping[cl_opts.level].dictionary_size; + } + if( cl_opts.num_workers < 0 ) // 0 disables multi-threading + cl_opts.num_workers = std::min( num_online, max_workers ); + + switch( cl_opts.program_mode ) + { + case m_none: show_error( "Missing operation.", 0, true ); return 1; + case m_append: + case m_create: return encode( cl_opts ); + case m_compress: return compress( cl_opts ); + case m_concatenate: return concatenate( cl_opts ); + case m_delete: return delete_members( cl_opts ); + case m_diff: + case m_extract: + case m_list: return decode( cl_opts ); + } + } diff --git a/tarlz.h b/tarlz.h new file mode 100644 index 0000000..2da81dd --- /dev/null +++ b/tarlz.h @@ -0,0 +1,609 @@ +/* Tarlz - Archiver with multimember lzip compression + Copyright (C) 2013-2022 Antonio Diaz Diaz. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include +#include +#include +#include +#include +#include + +#define max_file_size ( LLONG_MAX - header_size ) +enum { header_size = 512 }; +typedef uint8_t Tar_header[header_size]; + +enum Offsets { + name_o = 0, mode_o = 100, uid_o = 108, gid_o = 116, size_o = 124, + mtime_o = 136, chksum_o = 148, typeflag_o = 156, linkname_o = 157, + magic_o = 257, version_o = 263, uname_o = 265, gname_o = 297, + devmajor_o = 329, devminor_o = 337, prefix_o = 345 }; + +enum Lengths { + name_l = 100, mode_l = 8, uid_l = 8, gid_l = 8, size_l = 12, + mtime_l = 12, chksum_l = 8, typeflag_l = 1, linkname_l = 100, + magic_l = 6, version_l = 2, uname_l = 32, gname_l = 32, + devmajor_l = 8, devminor_l = 8, prefix_l = 155 }; + +enum Typeflag { + tf_regular = '0', tf_link = '1', tf_symlink = '2', tf_chardev = '3', + tf_blockdev = '4', tf_directory = '5', tf_fifo = '6', tf_hiperf = '7', + tf_global = 'g', tf_extended = 'x' }; + +const uint8_t ustar_magic[magic_l] = + { 0x75, 0x73, 0x74, 0x61, 0x72, 0 }; // "ustar\0" + +inline bool verify_ustar_magic( const Tar_header header ) + { return std::memcmp( header + magic_o, ustar_magic, magic_l ) == 0; } + +inline void init_tar_header( Tar_header header ) // set magic and version + { + std::memset( header, 0, header_size ); + std::memcpy( header + magic_o, ustar_magic, magic_l - 1 ); + header[version_o] = header[version_o+1] = '0'; + } + +inline void print_octal( uint8_t * const buf, int size, unsigned long long num ) + { while( --size >= 0 ) { buf[size] = num % 8 + '0'; num /= 8; } } + + +// Round "size" to the next multiple of header size (512). +// +inline unsigned long long round_up( const unsigned long long size ) + { + const int rem = size % header_size; + const int padding = rem ? header_size - rem : 0; + return size + padding; + } + + +inline int decimal_digits( unsigned long long value ) + { + int digits = 1; + while( value >= 10 ) { value /= 10; ++digits; } + return digits; + } + + +inline bool dotdot_at_i( const char * const filename, const int i ) + { + return ( filename[i] == '.' && filename[i+1] == '.' && + ( i == 0 || filename[i-1] == '/' ) && + ( filename[i+2] == 0 || filename[i+2] == '/' ) ); + } + + +inline bool contains_dotdot( const char * const filename ) + { + for( int i = 0; filename[i]; ++i ) + if( dotdot_at_i( filename, i ) ) return true; + return false; + } + + +class Resizable_buffer + { + char * p; + unsigned long size_; // size_ < LONG_MAX + +public: + // must be >= 87 for format_member_name + enum { default_initial_size = 2 * header_size }; + + explicit Resizable_buffer( const unsigned long initial_size = + default_initial_size ) + : p( (char *)std::malloc( initial_size ) ), size_( p ? initial_size : 0 ) {} + ~Resizable_buffer() { if( p ) std::free( p ); p = 0; size_ = 0; } + + bool resize( const unsigned long long new_size ) + { + if( new_size >= LONG_MAX ) return false; + if( size_ < new_size ) + { + char * const tmp = (char *)std::realloc( p, new_size ); + if( !tmp ) return false; + p = tmp; size_ = new_size; + } + return true; + } + char * operator()() { return p; } + const char * operator()() const { return p; } + uint8_t * u8() { return (uint8_t *)p; } + const uint8_t * u8() const { return (const uint8_t *)p; } + unsigned long size() const { return size_; } + }; + + +inline bool uid_in_ustar_range( const long long uid ) // also for gid + { return uid >= 0 && uid < 1 << 21; } + +inline bool time_in_ustar_range( const long long seconds ) + { return seconds >= 0 && seconds < 1LL << 33; } + + +/* The sign of the seconds field applies to the whole time value. + A nanoseconds value out of range means an invalid time. */ +class Etime // time since (or before) the epoch + { + long long sec_; + int nsec_; // range [0, 999_999_999] + +public: + Etime() : sec_( 0 ), nsec_( -1 ) {} + void reset() { sec_ = 0; nsec_ = -1; } + void set( const long long s ) { sec_ = s; nsec_ = 0; } + long long sec() const { return sec_; } + int nsec() const { return nsec_; } + bool isvalid() const { return nsec_ >= 0 && nsec_ <= 999999999; } + bool out_of_ustar_range() const + { return isvalid() && !time_in_ustar_range( sec_ ); } + + unsigned decimal_size() const; + unsigned print( char * const buf ) const; + bool parse( const char * const ptr, const char ** const tailp, + const long long size ); + }; + + +class Extended // stores metadata from/for extended records + { + static std::vector< std::string > unknown_keywords; // already diagnosed + std::string linkpath_; // these are the real metadata + std::string path_; + long long file_size_; // >= 0 && <= max_file_size + long long uid_, gid_; // may not fit in unsigned int + Etime atime_, mtime_; + + // cached sizes; if full_size_ < 0 they must be recalculated + mutable long long edsize_; // extended data size + mutable long long padded_edsize_; // edsize rounded up + mutable long long full_size_; // header + padded edsize + mutable long long linkpath_recsize_; + mutable long long path_recsize_; + mutable int file_size_recsize_; + mutable int uid_recsize_; + mutable int gid_recsize_; + mutable int atime_recsize_; + mutable int mtime_recsize_; + + // true if CRC present in parsed or formatted records + mutable bool crc_present_; + + void calculate_sizes() const; + void unknown_keyword( const char * const buf, + const unsigned long long size ) const; + +public: + static const std::string crc_record; + std::string removed_prefix; + + Extended() + : file_size_( 0 ), uid_( -1 ), gid_( -1 ), edsize_( 0 ), + padded_edsize_( 0 ), full_size_( 0 ), linkpath_recsize_( 0 ), + path_recsize_( 0 ), file_size_recsize_( 0 ), uid_recsize_( 0 ), + gid_recsize_( 0 ), atime_recsize_( 0 ), mtime_recsize_( 0 ), + crc_present_( false ) {} + + void reset() + { linkpath_.clear(); path_.clear(); file_size_ = 0; uid_ = -1; gid_ = -1; + atime_.reset(); mtime_.reset(); edsize_ = 0; padded_edsize_ = 0; + full_size_ = 0; linkpath_recsize_ = 0; path_recsize_ = 0; + file_size_recsize_ = 0; uid_recsize_ = 0; gid_recsize_ = 0; + atime_recsize_ = 0; mtime_recsize_ = 0; crc_present_ = false; + removed_prefix.clear(); } + + bool empty() const + { return linkpath_.empty() && path_.empty() && file_size_ == 0 && + uid_ < 0 && gid_ < 0 && + !atime_.out_of_ustar_range() && !mtime_.out_of_ustar_range(); } + + const std::string & linkpath() const { return linkpath_; } + const std::string & path() const { return path_; } + long long file_size() const { return file_size_; } + long long get_file_size_and_reset( const Tar_header header ); + long long get_uid() const { return uid_; } + long long get_gid() const { return gid_; } + const Etime & atime() const { return atime_; } + const Etime & mtime() const { return mtime_; } + + void linkpath( const char * const lp ) { linkpath_ = lp; full_size_ = -1; } + void path( const char * const p ) { path_ = p; full_size_ = -1; } + void file_size( const long long fs ) { full_size_ = -1; + file_size_ = ( fs >= 0 && fs <= max_file_size ) ? fs : 0; } + bool set_uid( const long long id ) + { if( id >= 0 ) { uid_ = id; full_size_ = -1; } return id >= 0; } + bool set_gid( const long long id ) + { if( id >= 0 ) { gid_ = id; full_size_ = -1; } return id >= 0; } + void set_atime( const long long s ) { atime_.set( s ); full_size_ = -1; } + void set_mtime( const long long s ) { mtime_.set( s ); full_size_ = -1; } + + long long full_size() const + { if( full_size_ < 0 ) calculate_sizes(); return full_size_; } + + bool crc_present() const { return crc_present_; } + long long format_block( Resizable_buffer & rbuf ) const; + bool parse( const char * const buf, const unsigned long long edsize, + const bool permissive ); + void fill_from_ustar( const Tar_header header ); + }; + + +class CRC32 + { + uint32_t data[256]; // Table of CRCs of all 8-bit messages. + +public: + CRC32( const bool castagnoli = false ) + { + const unsigned cpol = 0x82F63B78U; // CRC32-C Castagnoli polynomial. + const unsigned ipol = 0xEDB88320U; // IEEE 802.3 Ethernet polynomial. + const unsigned poly = castagnoli ? cpol : ipol; + + for( unsigned n = 0; n < 256; ++n ) + { + unsigned c = n; + for( int k = 0; k < 8; ++k ) + { if( c & 1 ) c = poly ^ ( c >> 1 ); else c >>= 1; } + data[n] = c; + } + } + + void update_byte( uint32_t & crc, const uint8_t byte ) const + { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); } + + // about as fast as it is possible without messing with endianness + void update_buf( uint32_t & crc, const uint8_t * const buffer, + const int size ) const + { + uint32_t c = crc; + for( int i = 0; i < size; ++i ) + c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 ); + crc = c; + } + + uint32_t compute_crc( const uint8_t * const buffer, const int size ) const + { + uint32_t crc = 0xFFFFFFFFU; + for( int i = 0; i < size; ++i ) + crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 ); + return crc ^ 0xFFFFFFFFU; + } + + // Calculates the crc of size bytes except a window of 8 bytes at pos + uint32_t windowed_crc( const uint8_t * const buffer, const int pos, + const int size ) const + { + uint32_t crc = 0xFFFFFFFFU; + update_buf( crc, buffer, pos ); + update_buf( crc, buffer + pos + 8, size - pos - 8 ); + return crc ^ 0xFFFFFFFFU; + } + }; + + +struct Lzma_options + { + int dictionary_size; // 4 KiB .. 512 MiB + int match_len_limit; // 5 .. 273 + }; +const Lzma_options option_mapping[] = + { + { 65535, 16 }, // -0 + { 1 << 20, 5 }, // -1 + { 3 << 19, 6 }, // -2 + { 1 << 21, 8 }, // -3 + { 3 << 20, 12 }, // -4 + { 1 << 22, 20 }, // -5 + { 1 << 23, 36 }, // -6 + { 1 << 24, 68 }, // -7 + { 3 << 23, 132 }, // -8 + { 1 << 25, 273 } }; // -9 + + +enum { + min_dictionary_bits = 12, + min_dictionary_size = 1 << min_dictionary_bits, + max_dictionary_bits = 29, + max_dictionary_size = 1 << max_dictionary_bits, + min_member_size = 36, + min_data_size = 2 * min_dictionary_size, + max_data_size = 2 * max_dictionary_size }; + + +inline bool isvalid_ds( const unsigned dictionary_size ) + { return ( dictionary_size >= min_dictionary_size && + dictionary_size <= max_dictionary_size ); } + + +const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP" + +struct Lzip_header + { + enum { size = 6 }; + uint8_t data[size]; // 0-3 magic bytes + // 4 version + // 5 coded dictionary size + + bool verify_magic() const + { return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); } + + bool verify_prefix( const int sz ) const // detect (truncated) header + { + for( int i = 0; i < sz && i < 4; ++i ) + if( data[i] != lzip_magic[i] ) return false; + return ( sz > 0 ); + } + + bool verify_corrupt() const // detect corrupt header + { + int matches = 0; + for( int i = 0; i < 4; ++i ) + if( data[i] == lzip_magic[i] ) ++matches; + return ( matches > 1 && matches < 4 ); + } + + uint8_t version() const { return data[4]; } + bool verify_version() const { return ( data[4] == 1 ); } + + unsigned dictionary_size() const + { + unsigned sz = ( 1 << ( data[5] & 0x1F ) ); + if( sz > min_dictionary_size ) + sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 ); + return sz; + } + + bool verify() const + { return verify_magic() && verify_version() && + isvalid_ds( dictionary_size() ); } + }; + + +struct Lzip_trailer + { + enum { size = 20 }; + uint8_t data[size]; // 0-3 CRC32 of the uncompressed data + // 4-11 size of the uncompressed data + // 12-19 member size including header and trailer + + unsigned data_crc() const + { + unsigned tmp = 0; + for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + + unsigned long long data_size() const + { + unsigned long long tmp = 0; + for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + + unsigned long long member_size() const + { + unsigned long long tmp = 0; + for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; } + return tmp; + } + + bool verify_consistency() const // check internal consistency + { + const unsigned crc = data_crc(); + const unsigned long long dsize = data_size(); + if( ( crc == 0 ) != ( dsize == 0 ) ) return false; + const unsigned long long msize = member_size(); + if( msize < min_member_size ) return false; + const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size; + if( mlimit > dsize && msize > mlimit ) return false; + const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1; + if( dlimit > msize && dsize > dlimit ) return false; + return true; + } + }; + + +enum Program_mode { m_none, m_append, m_compress, m_concatenate, m_create, + m_delete, m_diff, m_extract, m_list }; +enum Solidity { no_solid, bsolid, dsolid, asolid, solid }; +class Arg_parser; + +struct Cl_options // command line options + { + const Arg_parser & parser; + std::string archive_name; + std::string output_filename; + long long mtime; + long long uid; + long long gid; + Program_mode program_mode; + Solidity solidity; + int data_size; + int debug_level; + int level; // compression level, < 0 means uncompressed + int num_files; + int num_workers; // start this many worker threads + int out_slots; + bool dereference; + bool filenames_given; + bool ignore_ids; + bool ignore_overflow; + bool keep_damaged; + bool missing_crc; + bool mtime_set; + bool permissive; + bool preserve_permissions; + bool warn_newer; + + Cl_options( const Arg_parser & ap ) + : parser( ap ), mtime( 0 ), uid( -1 ), gid( -1 ), program_mode( m_none ), + solidity( bsolid ), data_size( 0 ), debug_level( 0 ), level( 6 ), + num_files( 0 ), num_workers( -1 ), out_slots( 64 ), dereference( false ), + filenames_given( false ), ignore_ids( false ), ignore_overflow( false ), + keep_damaged( false ), missing_crc( false ), mtime_set( false ), + permissive( false ), preserve_permissions( false ), warn_newer( false ) {} + + bool to_stdout() const { return output_filename == "-"; } + }; + +inline void set_retval( int & retval, const int new_val ) + { if( retval < new_val ) retval = new_val; } + +const char * const bad_magic_msg = "Bad magic number (file not in lzip format)."; +const char * const bad_dict_msg = "Invalid dictionary size in member header."; +const char * const corrupt_mm_msg = "Corrupt header in multimember file."; +const char * const trailing_msg = "Trailing data not allowed."; +const char * const bad_hdr_msg = "Corrupt or invalid tar header."; +const char * const gblrec_msg = "Error in global extended records."; +const char * const extrec_msg = "Error in extended records."; +const char * const miscrc_msg = "Missing CRC in extended records."; +const char * const misrec_msg = "Missing extended records."; +const char * const longrec_msg = "Extended records are too long."; +const char * const end_msg = "Archive ends unexpectedly."; +const char * const mem_msg = "Not enough memory."; +const char * const mem_msg2 = "Not enough memory. Try a lower compression level."; +const char * const fv_msg1 = "Format violation: extended header followed by EOA blocks."; +const char * const fv_msg2 = "Format violation: extended header followed by global header."; +const char * const fv_msg3 = "Format violation: consecutive extended headers found."; +const char * const posix_msg = "This does not look like a POSIX tar archive."; +const char * const posix_lz_msg = "This does not look like a POSIX tar.lz archive."; +const char * const eclosa_msg = "Error closing archive"; +const char * const eclosf_msg = "Error closing file"; +const char * const nfound_msg = "Not found in archive."; +const char * const seek_msg = "Seek error"; +const char * const werr_msg = "Write error"; +const char * const chdir_msg = "Error changing working directory"; + +// defined in common.cc +void xinit_mutex( pthread_mutex_t * const mutex ); +void xinit_cond( pthread_cond_t * const cond ); +void xdestroy_mutex( pthread_mutex_t * const mutex ); +void xdestroy_cond( pthread_cond_t * const cond ); +void xlock( pthread_mutex_t * const mutex ); +void xunlock( pthread_mutex_t * const mutex ); +void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex ); +void xsignal( pthread_cond_t * const cond ); +void xbroadcast( pthread_cond_t * const cond ); +unsigned long long parse_octal( const uint8_t * const ptr, const int size ); +int readblock( const int fd, uint8_t * const buf, const int size ); +int writeblock( const int fd, const uint8_t * const buf, const int size ); +bool nonempty_arg( const Arg_parser & parser, const int i ); + +// defined in common_decode.cc +bool block_is_zero( const uint8_t * const buf, const int size ); +bool format_member_name( const Extended & extended, const Tar_header header, + Resizable_buffer & rbuf, const bool long_format ); +bool show_member_name( const Extended & extended, const Tar_header header, + const int vlevel, Resizable_buffer & rbuf ); +bool check_skip_filename( const Cl_options & cl_opts, + std::vector< char > & name_pending, + const char * const filename ); +mode_t get_umask(); +bool make_path( const std::string & name ); + +// defined in compress.cc +int compress( const Cl_options & cl_opts ); + +// defined in create.cc +bool copy_file( const int infd, const int outfd, const long long max_size = -1 ); +bool writeblock_wrapper( const int outfd, const uint8_t * const buffer, + const int size ); +bool write_eoa_records( const int outfd, const bool compressed ); +const char * remove_leading_dotslash( const char * const filename, + std::string * const removed_prefixp, const bool dotdot = false ); +bool print_removed_prefix( const std::string & prefix, + std::string * const msgp = 0 ); +bool fill_headers( const char * const filename, Extended & extended, + Tar_header header, long long & file_size, const int flag ); +bool block_is_full( const long long extended_size, + const unsigned long long file_size, + const unsigned long long target_size, + unsigned long long & partial_data_size ); +void set_error_status( const int retval ); +int final_exit_status( int retval, const bool show_msg = true ); +unsigned ustar_chksum( const Tar_header header ); +bool verify_ustar_chksum( const Tar_header header ); +bool has_lz_ext( const std::string & name ); +int concatenate( const Cl_options & cl_opts ); +int encode( const Cl_options & cl_opts ); + +// defined in create_lz.cc +int encode_lz( const Cl_options & cl_opts, const char * const archive_namep, + const int outfd ); + +// defined in decode.cc +bool compare_file_type( std::string & estr, std::string & ostr, + const Cl_options & cl_opts, + const Extended & extended, const Tar_header header ); +class Archive_reader_base; +bool compare_file_contents( std::string & estr, std::string & ostr, + Archive_reader_base & ar, const long long file_size, + const char * const filename, const int infd2 ); +int decode( const Cl_options & cl_opts ); + +// defined in decode_lz.cc +struct Archive_descriptor; +int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad, + std::vector< char > & name_pending ); + +// defined in delete.cc +bool safe_seek( const int fd, const long long pos ); +int tail_copy( const Arg_parser & parser, const Archive_descriptor & ad, + std::vector< char > & name_pending, const long long istream_pos, + const int outfd, int retval ); +int delete_members( const Cl_options & cl_opts ); + +// defined in delete_lz.cc +int delete_members_lz( const Cl_options & cl_opts, + const Archive_descriptor & ad, + std::vector< char > & name_pending, const int outfd ); + +// defined in exclude.cc +namespace Exclude { +void add_pattern( const std::string & arg ); +void clear(); +bool excluded( const char * const filename ); +} // end namespace Exclude + +// defined in extended.cc +extern const CRC32 crc32c; + +// defined in lzip_index.cc +int seek_read( const int fd, uint8_t * const buf, const int size, + const long long pos ); + +// defined in main.cc +extern int verbosity; +extern const char * const program_name; +struct stat; +int hstat( const char * const filename, struct stat * const st, + const bool dereference ); +int open_instream( const std::string & name ); +int open_outstream( const std::string & name, const bool create = true, + Resizable_buffer * const rbufp = 0, const bool force = true ); +void exit_fail_mt( const int retval = 1 ); // terminate the program +void show_error( const char * const msg, const int errcode = 0, + const bool help = false ); +bool format_error( Resizable_buffer & rbuf, const int errcode, + const char * const format, ... ); +void print_error( const int errcode, const char * const format, ... ); +void format_file_error( std::string & estr, const char * const filename, + const char * const msg, const int errcode = 0 ); +bool format_file_error( Resizable_buffer & rbuf, const char * const filename, + const char * const msg, const int errcode = 0 ); +void show_file_error( const char * const filename, const char * const msg, + const int errcode = 0 ); +void internal_error( const char * const msg ); diff --git a/testsuite/check.sh b/testsuite/check.sh new file mode 100755 index 0000000..348e447 --- /dev/null +++ b/testsuite/check.sh @@ -0,0 +1,1417 @@ +#! /bin/sh +# check script for Tarlz - Archiver with multimember lzip compression +# Copyright (C) 2013-2022 Antonio Diaz Diaz. +# +# This script is free software: you have unlimited permission +# to copy, distribute, and modify it. + +LC_ALL=C +export LC_ALL +objdir=`pwd` +testdir=`cd "$1" ; pwd` +TARLZ="${objdir}"/tarlz +framework_failure() { echo "failure in testing framework" ; exit 1 ; } + +if [ ! -f "${TARLZ}" ] || [ ! -x "${TARLZ}" ] ; then + echo "${TARLZ}: cannot execute" + exit 1 +fi + +[ -e "${TARLZ}" ] 2> /dev/null || + { + echo "$0: a POSIX shell is required to run the tests" + echo "Try bash -c \"$0 $1 $2\"" + exit 1 + } + +if [ -d tmp ] ; then rm -rf tmp ; fi +mkdir tmp +cd "${objdir}"/tmp || framework_failure + +in="${testdir}"/test.txt +in_lz="${testdir}"/test.txt.lz +in_tar="${testdir}"/test.txt.tar +in_tar_lz="${testdir}"/test.txt.tar.lz +inbad1="${testdir}"/test_bad1.txt +inbad2="${testdir}"/test_bad2.txt +test3="${testdir}"/test3.tar +test3_lz="${testdir}"/test3.tar.lz +test3dir="${testdir}"/test3_dir.tar +test3dir_lz="${testdir}"/test3_dir.tar.lz +test3dot_lz="${testdir}"/test3_dot.tar.lz +t155="${testdir}"/t155.tar +t155_lz="${testdir}"/t155.tar.lz +tlzit1="${testdir}"/tlz_in_tar1.tar +tlzit2="${testdir}"/tlz_in_tar2.tar +bad1="${testdir}"/test3_bad1.tar +bad2="${testdir}"/test3_bad2.tar +bad3="${testdir}"/test3_bad3.tar +bad4="${testdir}"/test3_bad4.tar +bad5="${testdir}"/test3_bad5.tar +bad1_lz="${testdir}"/test3_bad1.tar.lz +bad2_lz="${testdir}"/test3_bad2.tar.lz +bad3_lz="${testdir}"/test3_bad3.tar.lz +bad4_lz="${testdir}"/test3_bad4.tar.lz +bad5_lz="${testdir}"/test3_bad5.tar.lz +bad6_lz="${testdir}"/test3_bad6.tar.lz +eoa="${testdir}"/eoa_blocks.tar +eoa_lz="${testdir}"/eoa_blocks.tar.lz +fail=0 +lwarnc=0 +test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; } +cyg_symlink() { [ ${lwarnc} = 0 ] && + printf "\nwarning: your OS follows symbolic links to directories even when tarlz asks it not to\n$1" + lwarnc=1 ; } + +# Description of test files for tarlz: +# test.txt.tar.lz: 1 member (test.txt). +# t155.tar[.lz]: directory + 3 links + file + EOA, all with 155 char names +# t155_fv?.tar[.lz]: like t155.tar but with 3 kinds of format violations +# t155_fv1.tar[.lz]: extra extended header before EOA blocks +# t155_fv2.tar[.lz]: first extended header followed by global header +# t155_fv3.tar[.lz]: consecutive extended headers in last member +# t155_fv[456].tar.lz: like t155_fv[123].tar.lz but violation alone in member +# tar_in_tlz1.tar.lz: 2 members (test.txt.tar test3.tar) 3 lzip members +# tar_in_tlz2.tar.lz: 2 members (test.txt.tar test3.tar) 5 lzip members +# ts_in_link.tar.lz: 4 symbolic links (link[1-4]) to / /dir/ dir/ dir(107/) +# test_bad1.txt.tar.lz: truncated at offset 6000 (of 7495) +# test_bad2.txt.tar.lz: byte at offset 6000 changed from 0x56 to 0x46 +# test3.tar[.lz]: 3 members (foo bar baz) + 2 zeroed 512-byte blocks +# test3_dir.tar[.lz] like test3.tar but members /dir/foo /dir/bar /dir/baz +# test3_dot.tar.lz: 3 times 3 members ./foo ././bar ./././baz +# the 3 central members with filename in extended header +# test3_bad1.tar: byte at offset 259 changed from 't' to '0' (magic) +# test3_bad2.tar: byte at offset 1283 changed from 't' to '0' (magic) +# test3_bad3.tar: byte at offset 2559 changed from 0x00 to 0x20 (padding) +# test3_bad4.tar: byte at offset 1283 changed from 't' to '0' (magic) +# byte at offset 2307 changed from 't' to '0' (magic) +# test3_bad5.tar: 510 zeros + "LZ" prepended to test3.tar (bogus lz header) +# test3_bad1.tar.lz: byte at offset 2 changed from 'I' to 'i' (magic) +# test3_bad2.tar.lz: byte at offset 49 changed from 0x49 to 0x69 (mid stream) +# test3_bad3.tar.lz: byte at offset 176 changed from 0x7D to 0x6D (mid stream) +# test3_bad4.tar.lz: combined damage of test3_bad2.tar.lz and test3_bad3.tar.lz +# test3_bad5.tar.lz: [71-134] --> zeroed (first trailer + second header) +# test3_bad6.tar.lz: 510 zeros prepended to test3.tar.lz (header in two blocks) +# test3_eoa?.tar: like test3_eoa?.tar.lz but uncompressed +# test3_eoa1.tar.lz: test3.tar.lz without EOA blocks +# test3_eoa2.tar.lz: test3.tar.lz with only one EOA block +# test3_eoa3.tar.lz: test3.tar.lz with one zeroed block between foo and bar +# test3_eoa4.tar.lz: test3.tar.lz ended by extended header without EOA blocks +# test3_eoa5.tar.lz: test3.tar.lz split extended bar member, without EOA blocks +# test3_em?.tar.lz: test3.tar.lz with one empty lzip member at each position +# test3_em6.tar.lz: test3.tar.lz preceded by four empty lzip members +# test3_gh?.tar: test3.tar with global header at each position +# test3_gh?.tar.lz: test3.tar.lz with global before bar split in 4 ways +# test3_gh5.tar.lz: test3.tar.lz with global in lzip member before foo +# test3_gh6.tar.lz: test3.tar.lz with global before foo in same member +# test3_nn.tar[.lz]: test3.tar[.lz] with zeroed name (no name) in bar member +# test3_sm?.tar.lz: test3.tar.lz with extended bar member split in 4 ways +# tlz_in_tar1.tar: 1 member (test3.tar.lz) first magic damaged +# tlz_in_tar2.tar: 2 members (foo test3.tar.lz) first magic damaged +# ug32chars.tar.lz: 1 member (foo) with 32-character owner and group names +# ug32767.tar.lz: 1 member (foo) with numerical-only owner and group + +# Note that multi-threaded --list succeeds with test_bad2.txt.tar.lz and +# test3_bad3.tar.lz because their headers are intact. + +"${TARLZ}" --check-lib # just print warning +[ $? != 2 ] || test_failed $LINENO # unless bad lzlib.h +printf "testing tarlz-%s..." "$2" + +"${TARLZ}" -q -tf "${in}" +[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -q -tf "${in_lz}" +[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -q -tf "${in_tar_lz}" -f "${in_tar_lz}" +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -tf nx_file +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -tf 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -cf out.tar.lz +[ $? = 1 ] || test_failed $LINENO +[ ! -e out.tar.lz ] || test_failed $LINENO +"${TARLZ}" -rf out.tar.lz || test_failed $LINENO +[ ! -e out.tar.lz ] || test_failed $LINENO +"${TARLZ}" -r || test_failed $LINENO +"${TARLZ}" --uncompressed -q -rf out.tar "${in}" +[ $? = 1 ] || test_failed $LINENO +[ ! -e out.tar ] || test_failed $LINENO +cat "${test3_lz}" > test.tar.lz || framework_failure +"${TARLZ}" --uncompressed -q -rf test.tar.lz "${in}" +[ $? = 2 ] || test_failed $LINENO +cmp "${test3_lz}" test.tar.lz || test_failed $LINENO +rm -f test.tar.lz || framework_failure +cat "${test3}" > test.tar || framework_failure +"${TARLZ}" -q -rf test.tar "${in}" +[ $? = 2 ] || test_failed $LINENO +cmp "${test3}" test.tar || test_failed $LINENO +rm -f test.tar || framework_failure +"${TARLZ}" -q -c "${in}" nx_file > /dev/null +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -c -C nx_dir "${in}" +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -x -C nx_dir "${test3_lz}" +[ $? = 1 ] || test_failed $LINENO +touch empty.tar.lz empty.tlz # list an empty lz file +"${TARLZ}" -q -tf empty.tar.lz +[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -q -tf empty.tlz +[ $? = 2 ] || test_failed $LINENO +rm -f empty.tar.lz empty.tlz || framework_failure +"${TARLZ}" -q -cd # test mixed operations +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -cr +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -ct +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -cx +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -tx +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -ctx +[ $? = 1 ] || test_failed $LINENO +for i in A c d r t x -delete ; do # test -o with operations other than -z + "${TARLZ}" -q -$i -o - + [ $? = 1 ] || test_failed $LINENO $i +done +"${TARLZ}" -q -z -f - +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -z . +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -z -o - --uncompressed "${test3}" > /dev/null 2>&1 +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -q -tf "${in_tar_lz}" "" # empty non-option argument +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" --help > /dev/null || test_failed $LINENO +"${TARLZ}" -V > /dev/null || test_failed $LINENO +"${TARLZ}" --bad_option -tf "${test3_lz}" 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" -tf 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +bad_dates='@-9223372036854775809 @9223372036854775808 + -2147481749-01-01T00:00:00 2147483648-01-01T00:00:00 + 2017-10-01T 2017-10 ./nx_file' +for i in ${bad_dates} ; do + "${TARLZ}" -c --mtime="$i" "${in}" > /dev/null 2>&1 + [ $? = 1 ] || test_failed $LINENO "$i" +done +"${TARLZ}" --owner=invalid_owner_name -tf "${test3_lz}" 2> /dev/null +[ $? = 1 ] || test_failed $LINENO +"${TARLZ}" --group=invalid_group_name -tf "${test3_lz}" 2> /dev/null +[ $? = 1 ] || test_failed $LINENO + +printf "\ntesting --list and --extract..." + +# test --list and --extract +"${TARLZ}" -tf "${eoa_lz}" --missing-crc || test_failed $LINENO +"${TARLZ}" -xf "${eoa_lz}" --missing-crc || test_failed $LINENO +"${TARLZ}" -C nx_dir -tf "${in_tar}" > /dev/null || test_failed $LINENO +"${TARLZ}" -xf "${in_tar}" --missing-crc || test_failed $LINENO +cmp "${in}" test.txt || test_failed $LINENO +rm -f test.txt || framework_failure +"${TARLZ}" -tf "${in_tar_lz}" --missing-crc > /dev/null || test_failed $LINENO +for i in 0 2 6 ; do + "${TARLZ}" -n$i -xf "${in_tar_lz}" --missing-crc || test_failed $LINENO $i + cmp "${in}" test.txt || test_failed $LINENO $i + rm -f test.txt || framework_failure +done + +# test3 reference files for -t and -tv (list3, vlist3) +"${TARLZ}" -tf "${test3}" > list3 || test_failed $LINENO +"${TARLZ}" -tvf "${test3}" > vlist3 || test_failed $LINENO +for i in 0 2 6 ; do + "${TARLZ}" -n$i -tf "${test3_lz}" > out || test_failed $LINENO $i + diff -u list3 out || test_failed $LINENO $i + "${TARLZ}" -n$i -tvf "${test3_lz}" > out || test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i +done +rm -f out || framework_failure + +# test3 reference files for cmp +cat "${testdir}"/rfoo > cfoo || framework_failure +cat "${testdir}"/rbar > cbar || framework_failure +cat "${testdir}"/rbaz > cbaz || framework_failure + +# test --list and --extract test3 +rm -f foo bar baz || framework_failure +"${TARLZ}" -xf "${test3}" --missing-crc || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +for i in 0 2 6 ; do + "${TARLZ}" -n$i -xf "${test3_lz}" --missing-crc || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -n$i -tvf "${test3_lz}" ./foo ./bar ./baz > out 2> /dev/null || + test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i + rm -f out || framework_failure + "${TARLZ}" -q -n$i -xf "${test3_lz}" ./foo ./bar ./baz || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -n$i -xf "${test3_lz}" foo/ bar// baz/// || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dot_lz}" --missing-crc || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -tf "${test3dot_lz}" foo bar baz || test_failed $LINENO $i + "${TARLZ}" -q -n$i -xf "${test3dot_lz}" foo bar baz || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure +done + +for i in "${test3dir}" "${test3dir_lz}" ; do + "${TARLZ}" -q -tf "$i" --missing-crc || test_failed $LINENO "$i" + "${TARLZ}" -q -xf "$i" --missing-crc || test_failed $LINENO "$i" + cmp cfoo dir/foo || test_failed $LINENO "$i" + cmp cbar dir/bar || test_failed $LINENO "$i" + cmp cbaz dir/baz || test_failed $LINENO "$i" + rm -rf dir || framework_failure + "${TARLZ}" -q -tf "$i" dir || test_failed $LINENO "$i" + "${TARLZ}" -q -xf "$i" dir || test_failed $LINENO "$i" + cmp cfoo dir/foo || test_failed $LINENO "$i" + cmp cbar dir/bar || test_failed $LINENO "$i" + cmp cbaz dir/baz || test_failed $LINENO "$i" + rm -rf dir || framework_failure + "${TARLZ}" -q -tf "$i" dir/foo dir/baz || test_failed $LINENO "$i" + "${TARLZ}" -q -xf "$i" dir/foo dir/baz || test_failed $LINENO "$i" + cmp cfoo dir/foo || test_failed $LINENO "$i" + [ ! -e dir/bar ] || test_failed $LINENO "$i" + cmp cbaz dir/baz || test_failed $LINENO "$i" + rm -rf dir || framework_failure +done + +# test --extract --exclude +"${TARLZ}" -xf "${test3}" --exclude='f*o' --exclude=baz || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +rm -f foo bar baz || framework_failure +for i in 0 2 6 ; do + "${TARLZ}" -n$i -xf "${test3_lz}" --exclude=bar || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + [ ! -e bar ] || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude='?ar' || test_failed $LINENO $i + cmp cfoo dir/foo || test_failed $LINENO $i + [ ! -e dir/bar ] || test_failed $LINENO $i + cmp cbaz dir/baz || test_failed $LINENO $i + rm -rf dir || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude=dir/bar || test_failed $LINENO $i + cmp cfoo dir/foo || test_failed $LINENO $i + [ ! -e dir/bar ] || test_failed $LINENO $i + cmp cbaz dir/baz || test_failed $LINENO $i + rm -rf dir || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude=dir || test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + rm -rf dir || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude='dir/*' || test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + rm -rf dir || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude='[bf][ao][orz]' || + test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + rm -rf dir || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude='*o' dir/foo || + test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + rm -rf dir || framework_failure +done + +# test --list and --extract EOA +"${TARLZ}" -tvf "${testdir}"/test3_eoa1.tar > out 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +diff -u vlist3 out || test_failed $LINENO +"${TARLZ}" -tvf "${testdir}"/test3_eoa2.tar > out || test_failed $LINENO +diff -u vlist3 out || test_failed $LINENO +"${TARLZ}" -q -tf "${testdir}"/test3_eoa3.tar || test_failed $LINENO +"${TARLZ}" -tvf "${testdir}"/test3_eoa4.tar > out 2> /dev/null +[ $? = 2 ] || test_failed $LINENO +diff -u vlist3 out || test_failed $LINENO +for i in 0 2 6 ; do + "${TARLZ}" -n$i -tvf "${testdir}"/test3_eoa1.tar.lz > out 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i + "${TARLZ}" -n$i -tvf "${testdir}"/test3_eoa2.tar.lz > out || + test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i + "${TARLZ}" -q -n$i -tf "${testdir}"/test3_eoa3.tar.lz || + test_failed $LINENO $i + "${TARLZ}" -n$i -tvf "${testdir}"/test3_eoa4.tar.lz > out 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i + "${TARLZ}" -n$i -tvf "${testdir}"/test3_eoa5.tar.lz > out 2> /dev/null + [ $? = 2 ] || test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i +done +rm -f out || framework_failure +# +"${TARLZ}" -q -xf "${testdir}"/test3_eoa1.tar +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -xf "${testdir}"/test3_eoa2.tar || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -xf "${testdir}"/test3_eoa3.tar || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -xf "${testdir}"/test3_eoa4.tar +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +# +for i in 0 2 6 ; do + "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eoa1.tar.lz + [ $? = 2 ] || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -n$i -xf "${testdir}"/test3_eoa2.tar.lz || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eoa4.tar.lz + [ $? = 2 ] || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eoa5.tar.lz + [ $? = 2 ] || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure +done +"${TARLZ}" -n0 -xf "${testdir}"/test3_eoa3.tar.lz || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO $i +[ ! -e bar ] || test_failed $LINENO $i +[ ! -e baz ] || test_failed $LINENO $i +rm -f foo bar baz || framework_failure + +# test --list and --extract tar in tar.lz +for i in "${testdir}"/tar_in_tlz1.tar.lz "${testdir}"/tar_in_tlz2.tar.lz ; do + for j in 0 2 6 ; do + "${TARLZ}" -tf "$i" -n$j > out$j || + test_failed $LINENO "$i $j" + "${TARLZ}" -tvf "$i" -n$j > outv$j || + test_failed $LINENO "$i $j" + done + diff -u out0 out2 || test_failed $LINENO "$i" + diff -u out0 out6 || test_failed $LINENO "$i" + diff -u out2 out6 || test_failed $LINENO "$i" + diff -u outv0 outv2 || test_failed $LINENO "$i" + diff -u outv0 outv6 || test_failed $LINENO "$i" + diff -u outv2 outv6 || test_failed $LINENO "$i" + rm -f out0 out2 out6 outv0 outv2 outv6 || framework_failure + for j in 0 2 6 ; do + "${TARLZ}" -xf "$i" -n$j || test_failed $LINENO "$i $j" + cmp "${in_tar}" test.txt.tar || test_failed $LINENO "$i $j" + cmp "${test3}" test3.tar || test_failed $LINENO "$i $j" + rm -f test.txt.tar test3.tar || framework_failure + done +done + +# test --list and --extract with global headers uncompressed +for i in gh1 gh2 gh3 gh4 ; do + "${TARLZ}" -tf "${testdir}"/test3_${i}.tar > out || test_failed $LINENO $i + diff -u list3 out || test_failed $LINENO $i + "${TARLZ}" -tvf "${testdir}"/test3_${i}.tar > out || test_failed $LINENO $i + diff -u vlist3 out || test_failed $LINENO $i + "${TARLZ}" -xf "${testdir}"/test3_${i}.tar || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz out || framework_failure +done + +# test --list and --extract with empty lzip members, global headers and +# extended tar members split among lzip members +for i in em1 em2 em3 em4 em5 em6 gh1 gh2 gh3 gh4 gh5 gh6 sm1 sm2 sm3 sm4 ; do + for j in 0 2 6 ; do + "${TARLZ}" -n$j -tf "${testdir}"/test3_${i}.tar.lz > out || + test_failed $LINENO "$i $j" + diff -u list3 out || test_failed $LINENO "$i $j" + "${TARLZ}" -n$j -tvf "${testdir}"/test3_${i}.tar.lz > out || + test_failed $LINENO "$i $j" + diff -u vlist3 out || test_failed $LINENO "$i $j" + done + rm -f out || framework_failure + for j in 0 2 6 ; do + "${TARLZ}" -n$j -xf "${testdir}"/test3_${i}.tar.lz || + test_failed $LINENO "$i $j" + cmp cfoo foo || test_failed $LINENO "$i $j" + cmp cbar bar || test_failed $LINENO "$i $j" + cmp cbaz baz || test_failed $LINENO "$i $j" + rm -f foo bar baz || framework_failure + done +done +rm -f list3 vlist3 || framework_failure + +printf "\ntesting --concatenate..." + +# test --concatenate compressed +cat "${in}" > out.tar.lz || framework_failure # invalid tar.lz +"${TARLZ}" -Aqf out.tar.lz "${test3_lz}" +[ $? = 2 ] || test_failed $LINENO +cat "${in_tar_lz}" > out.tar.lz || framework_failure +"${TARLZ}" -Af out.tar.lz "${test3_lz}" || test_failed $LINENO +"${TARLZ}" -xf out.tar.lz || test_failed $LINENO +cmp "${in}" test.txt || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f test.txt foo bar baz || framework_failure +touch aout.tar.lz || framework_failure # concatenate to empty file +"${TARLZ}" -Aqf aout.tar.lz "${in_tar}" +[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -Af aout.tar.lz "${in_tar_lz}" "${test3_lz}" || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -Af aout.tar.lz || test_failed $LINENO # concatenate nothing +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -Aqf aout.tar.lz aout.tar.lz || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -Aq "${in_tar_lz}" "${test3}" > aout.tar.lz # to stdout +[ $? = 2 ] || test_failed $LINENO +cmp "${in_tar_lz}" aout.tar.lz || test_failed $LINENO +"${TARLZ}" -A "${in_tar_lz}" "${test3_lz}" > aout.tar.lz || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +cat "${eoa_lz}" > aout.tar.lz || framework_failure +"${TARLZ}" -Aqf aout.tar.lz "${in_tar}" # concatenate to empty archive +[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -Af aout.tar.lz "${in_tar_lz}" "${test3_lz}" || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +cat "${in_tar_lz}" > aout.tar.lz || framework_failure +"${TARLZ}" -Aqf aout.tar.lz "${test3_lz}" "${test3}" +[ $? = 2 ] || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +rm -f aout.tar.lz || framework_failure +touch aout.tar.lz || framework_failure # --exclude +"${TARLZ}" -Af aout.tar.lz "${in_tar_lz}" "${test3_lz}" --exclude 'test3*' || + test_failed $LINENO +"${TARLZ}" -Af aout.tar.lz "${in_tar_lz}" "${test3_lz}" --exclude '*txt*' || + test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +rm -f out.tar.lz aout.tar.lz || framework_failure + +# test --concatenate uncompressed +cat "${in}" > out.tar || framework_failure # invalid tar +"${TARLZ}" -Aqf out.tar "${test3}" +[ $? = 2 ] || test_failed $LINENO +cat "${in_tar}" > out.tar || framework_failure +"${TARLZ}" -Af out.tar "${test3}" || test_failed $LINENO +"${TARLZ}" -xf out.tar || test_failed $LINENO +cmp "${in}" test.txt || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f test.txt foo bar baz || framework_failure +touch aout.tar || framework_failure # concatenate to empty file +"${TARLZ}" -Aqf aout.tar "${in_tar_lz}" +[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -Af aout.tar "${in_tar}" "${test3}" || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" -Af aout.tar || test_failed $LINENO # concatenate nothing +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" -Aqf aout.tar aout.tar || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" -Aq "${in_tar}" "${test3_lz}" > aout.tar # to stdout +[ $? = 2 ] || test_failed $LINENO +cmp "${in_tar}" aout.tar || test_failed $LINENO +"${TARLZ}" -A "${in_tar}" "${test3}" > aout.tar || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +cat "${eoa}" > aout.tar || framework_failure # concatenate to empty archive +"${TARLZ}" -Aqf aout.tar "${in_tar_lz}" +[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -Af aout.tar "${in_tar}" "${test3}" || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +cat "${in_tar}" > aout.tar || framework_failure +"${TARLZ}" -Aqf aout.tar "${test3}" "${test3_lz}" +[ $? = 2 ] || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +rm -f aout.tar || framework_failure +touch aout.tar || framework_failure # --exclude +"${TARLZ}" -Af aout.tar "${test3}" "${in_tar}" --exclude 'test3*' || + test_failed $LINENO +"${TARLZ}" -Af aout.tar "${test3}" "${in_tar}" --exclude '*txt*' || + test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +rm -f out.tar aout.tar || framework_failure + +printf "\ntesting --create..." + +# test --create +cat "${in}" > test.txt || framework_failure +"${TARLZ}" --warn-newer -0 -cf out.tar.lz test.txt || test_failed $LINENO +rm -f test.txt || framework_failure +"${TARLZ}" -xf out.tar.lz --missing-crc || test_failed $LINENO +cmp "${in}" test.txt || test_failed $LINENO +cat "${in}" > test.txt || framework_failure +"${TARLZ}" --warn-newer --uncompressed -cf out.tar test.txt || test_failed $LINENO +rm -f test.txt || framework_failure +"${TARLZ}" -xf out.tar --missing-crc || test_failed $LINENO +cmp "${in}" test.txt || test_failed $LINENO +rm -f test.txt out.tar out.tar.lz || framework_failure + +cat cfoo > foo || framework_failure +rm -f bar || framework_failure +cat cbaz > baz || framework_failure +"${TARLZ}" -0 -q -cf out.tar.lz foo bar baz +[ $? = 1 ] || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -xf out.tar.lz --missing-crc || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -xf out.tar.lz bar +[ $? = 1 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +rm -f out.tar.lz || framework_failure + +cat cfoo > foo || framework_failure +cat cbar > bar || framework_failure +cat cbaz > baz || framework_failure +"${TARLZ}" -0 -cf out.tar.lz foo bar baz --out-slots=1 || test_failed $LINENO +"${TARLZ}" -0 -q -cf aout.tar.lz foo bar aout.tar.lz baz || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO # test reproducible +rm -f aout.tar.lz || framework_failure +# +"${TARLZ}" -0 -cf aout.tar.lz foo bar baz -C / || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +rm -f aout.tar.lz || framework_failure +"${TARLZ}" -0 -C / -cf aout.tar.lz -C "${objdir}"/tmp foo bar baz || + test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +rm -f aout.tar.lz || framework_failure +"${TARLZ}" --asolid -0 -cf aout.tar.lz foo bar baz || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +rm -f aout.tar.lz || framework_failure +"${TARLZ}" -0 -q -cf aout.tar.lz foo/ ./bar ./baz/ || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +rm -f aout.tar.lz || framework_failure +mkdir dir1 || framework_failure +"${TARLZ}" -C dir1 -xf out.tar.lz || test_failed $LINENO +cmp cfoo dir1/foo || test_failed $LINENO +cmp cbar dir1/bar || test_failed $LINENO +cmp cbaz dir1/baz || test_failed $LINENO +rm -f aout.tar.lz foo bar baz || framework_failure +"${TARLZ}" -C dir1 -0 -cf aout.tar.lz foo bar baz || test_failed $LINENO +"${TARLZ}" -xf aout.tar.lz || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f aout.tar.lz foo bar baz || framework_failure +"${TARLZ}" -C dir1 -0 -c foo bar baz | "${TARLZ}" -x || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f dir1/foo dir1/bar dir1/baz || framework_failure +"${TARLZ}" -0 -c foo bar baz | "${TARLZ}" -C dir1 -x || test_failed $LINENO +cmp cfoo dir1/foo || test_failed $LINENO +cmp cbar dir1/bar || test_failed $LINENO +cmp cbaz dir1/baz || test_failed $LINENO +rm -f dir1/foo dir1/bar dir1/baz || framework_failure +"${TARLZ}" -0 -c foo bar baz | "${TARLZ}" -x foo bar baz -C dir1 || + test_failed $LINENO +cmp cfoo dir1/foo || test_failed $LINENO +cmp cbar dir1/bar || test_failed $LINENO +cmp cbaz dir1/baz || test_failed $LINENO +rm -f foo dir1/bar baz || framework_failure +"${TARLZ}" -0 -cf aout.tar.lz -C dir1 foo -C .. bar -C dir1 baz || + test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -0 -cf aout.tar.lz dir1/foo dir1/baz || test_failed $LINENO +rm -rf dir1 bar || framework_failure +"${TARLZ}" -xf aout.tar.lz dir1 || test_failed $LINENO +cmp cfoo dir1/foo || test_failed $LINENO +cmp cbaz dir1/baz || test_failed $LINENO +rm -rf dir1 || framework_failure +rm -f out.tar.lz aout.tar.lz || framework_failure + +# test --create --exclude +cat cfoo > foo || framework_failure +cat cbar > bar || framework_failure +cat cbaz > baz || framework_failure +"${TARLZ}" -0 -cf out.tar.lz foo bar baz --exclude 'ba?' || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -xf out.tar.lz || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +rm -f out.tar.lz foo bar baz || framework_failure +cat cfoo > foo || framework_failure +cat cbar > bar || framework_failure +cat cbaz > baz || framework_failure +"${TARLZ}" --un -cf out.tar foo bar baz --exclude 'ba*' || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -xf out.tar || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +rm -f out.tar foo bar baz || framework_failure + +# test --create --mtime +dates='@-9223372036854775808 @-9223372036854775807 + -2147481748-12-31T23:59:59 -1970-01-01T00:00:00 + 0000-01-01T00:00:00 0000-01-01T00:00:01 0000-01-02T00:00:00 + 1697-10-17T11:03:27 1697-10-17T11:03:28 1697-10-17T11:03:29 + 1833-11-24T17:31:43 1833-11-24T17:31:44 1833-11-24T17:31:45 + 1901-12-13T20:45:51 1901-12-13T20:45:52 1901-12-13T20:45:53 + 1901-12-14T20:45:51 + 1969-12-31T23:59:58 1969-12-31T23:59:59 + 1970-01-01T00:00:00 1970-01-01T00:00:01 @0 + 2038-01-18T03:14:07 2038-01-19T03:14:07 2038-01-19T03:14:08 + 2106-02-07T06:28:15 2106-02-07T06:28:16 + 2242-03-16T12:56:31 2242-03-16T12:56:32 @8589934591 @8589934592 + 9999-12-31T23:59:58 9999-12-31T23:59:59 + 2147483647-12-31T23:59:59 @9223372036854775807' +touch -d 2022-01-05T12:22:13 bar || framework_failure +for i in ${dates} @-8Ei '2017-10-01 09:00:00' '2017-10-1 9:0:0' \ + '2017-10-01 09:00' '2017-10-01 09' 2017-10-01 ./bar ; do + touch foo || framework_failure + "${TARLZ}" --un -cf out.tar --mtime="$i" foo || test_failed $LINENO "$i" + "${TARLZ}" -q -df out.tar && test_failed $LINENO "$i" + "${TARLZ}" -xf out.tar || test_failed $LINENO "$i" + "${TARLZ}" -df out.tar --ignore-overflow || test_failed $LINENO "$i" +done +rm -f out.tar foo bar || framework_failure + +mkdir dir || framework_failure +for i in ${dates} ; do + # Skip a time stamp $i if it's out of range for this platform, + # of if it uses a notation that this platform does not recognize. + touch -d $i dir/f$i >/dev/null 2>&1 || continue +done +"${TARLZ}" --uncompressed -cf out.tar dir || test_failed $LINENO +"${TARLZ}" -df out.tar || test_failed $LINENO +rm -rf out.tar dir || framework_failure + +printf "\ntesting --diff..." + +# test --diff +"${TARLZ}" -xf "${test3_lz}" || test_failed $LINENO +"${TARLZ}" --uncompressed -cf out.tar foo || test_failed $LINENO +"${TARLZ}" --uncompressed -cf aout.tar foo --anonymous || test_failed $LINENO +if cmp out.tar aout.tar > /dev/null ; then + printf "\nwarning: '--diff' test can't be run as root.\n" +else + for i in 0 2 6 ; do + "${TARLZ}" -n$i -xf "${test3_lz}" || test_failed $LINENO $i + "${TARLZ}" -n$i -df "${test3_lz}" > out$i + [ $? = 1 ] || test_failed $LINENO $i + "${TARLZ}" -n$i -df "${test3_lz}" --ignore-ids || test_failed $LINENO $i + "${TARLZ}" -n$i -df "${test3_lz}" --exclude '*' || test_failed $LINENO $i + "${TARLZ}" -n$i -df "${in_tar_lz}" --exclude '*' || test_failed $LINENO $i + rm -f bar || framework_failure + "${TARLZ}" -n$i -df "${test3_lz}" foo baz --ignore-ids || + test_failed $LINENO $i + "${TARLZ}" -n$i -df "${test3_lz}" --exclude bar --ignore-ids || + test_failed $LINENO $i + rm -f foo baz || framework_failure + "${TARLZ}" -q -n$i -xf "${test3dir_lz}" || test_failed $LINENO $i + "${TARLZ}" -q -n$i -df "${test3dir_lz}" --ignore-ids || + test_failed $LINENO $i + "${TARLZ}" -q -n$i -df "${test3dir_lz}" dir --ignore-ids || + test_failed $LINENO $i + "${TARLZ}" -n$i -df "${test3_lz}" --ignore-ids -C dir || + test_failed $LINENO $i + rm -rf dir || framework_failure + done + cmp out0 out2 || test_failed $LINENO + cmp out0 out6 || test_failed $LINENO + rm -f out0 out2 out6 || framework_failure +fi +rm -f out.tar aout.tar foo bar baz || framework_failure + +printf "\ntesting --delete..." + +# test --delete +cat "${in}" > out.tar || framework_failure # invalid tar +"${TARLZ}" -q -f out.tar --delete foo +[ $? = 2 ] || test_failed $LINENO +rm -f out.tar || framework_failure +cat "${in}" > out.tar.lz || framework_failure # invalid tar.lz +"${TARLZ}" -q -f out.tar.lz --delete foo +[ $? = 2 ] || test_failed $LINENO +cat "${in_lz}" > out.tar.lz || framework_failure # invalid tar.lz +"${TARLZ}" -q -f out.tar.lz --delete foo +[ $? = 2 ] || test_failed $LINENO +rm -f out.tar.lz || framework_failure + +for e in "" .lz ; do + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --delete test.txt || test_failed $LINENO $e + cmp "${test3}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --delete || test_failed $LINENO $e # delete nothing + cmp "${test3}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --delete nx_file + [ $? = 1 ] || test_failed $LINENO $e + cmp "${test3}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --delete test.txt || test_failed $LINENO $e + cmp "${test3dir}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --delete dir || test_failed $LINENO $e + cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --del dir/foo dir/bar dir/baz || test_failed $LINENO $e + cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --del dir/foo dir/baz || test_failed $LINENO $e + cmp "${in_tar}"$e out.tar$e > /dev/null && test_failed $LINENO $e + "${TARLZ}" -q -f out.tar$e --del dir/bar || test_failed $LINENO $e + cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --delete foo bar baz || test_failed $LINENO $e + cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --del test.txt foo bar baz || test_failed $LINENO $e + cmp "${eoa}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + for i in test.txt foo bar baz ; do + "${TARLZ}" -f out.tar$e --delete $i || test_failed $LINENO "$e $i" + done + cmp "${eoa}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + for i in baz bar foo test.txt ; do + "${TARLZ}" -f out.tar$e --delete $i || test_failed $LINENO "$e $i" + done + cmp "${eoa}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e + for i in foo bar test.txt baz ; do + "${TARLZ}" -f out.tar$e --delete $i || test_failed $LINENO "$e $i" + done + cmp "${eoa}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -A "${in_tar}"$e "${t155}"$e "${test3}"$e > out.tar$e || + test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --del baz foo test.txt bar || test_failed $LINENO $e + cmp "${t155}"$e out.tar$e || test_failed $LINENO $e + "${TARLZ}" -f out.tar$e --delete link || test_failed $LINENO $e + "${TARLZ}" -q -tf out.tar$e || test_failed $LINENO $e + cmp "${t155}"$e out.tar$e > /dev/null && test_failed $LINENO $e + rm -f out.tar$e || framework_failure +done + +# test --delete individual member after collective member +cat cfoo > foo || framework_failure +cat cbar > bar || framework_failure +cat cbaz > baz || framework_failure +cat "${in}" > test.txt || framework_failure +"${TARLZ}" -0 -cf out.tar.lz foo bar baz --asolid || test_failed $LINENO +"${TARLZ}" -0 -rf out.tar.lz test.txt || test_failed $LINENO +rm -f foo bar baz test.txt || framework_failure +for i in foo bar baz ; do + "${TARLZ}" -q -f out.tar.lz --delete $i + [ $? = 2 ] || test_failed $LINENO $i +done +"${TARLZ}" -f out.tar.lz --delete test.txt || test_failed $LINENO +"${TARLZ}" -xf out.tar.lz || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +[ ! -e test.txt ] || test_failed $LINENO +rm -f out.tar.lz foo bar baz test.txt || framework_failure + +# test --delete with empty lzip member, global header +for i in 1 2 3 4 5 6 ; do + cat "${testdir}"/test3_em${i}.tar.lz > out.tar.lz || framework_failure + for j in foo bar baz ; do + "${TARLZ}" -f out.tar.lz --delete $j || test_failed $LINENO "$i $j" + done + rm -f out.tar.lz || framework_failure +done +cat "${testdir}"/test3_gh5.tar.lz > out.tar.lz || framework_failure +for i in foo bar baz ; do + "${TARLZ}" -f out.tar.lz --delete $i || test_failed $LINENO $i +done +rm -f out.tar.lz || framework_failure +for i in 1 2 3 4 ; do + cat "${testdir}"/test3_gh${i}.tar > out.tar || framework_failure + for j in foo bar baz ; do + "${TARLZ}" -f out.tar --delete $j || test_failed $LINENO "$i $j" + done + rm -f out.tar || framework_failure +done + +printf "\ntesting --dereference..." + +# test --dereference +touch dummy_file || framework_failure +if ln dummy_file dummy_link 2> /dev/null && + ln -s dummy_file dummy_slink 2> /dev/null ; then + ln_works=yes +else + printf "\nwarning: skipping link test: 'ln' does not work on your system.\n" +fi +rm -f dummy_slink dummy_link dummy_file || framework_failure +# +if [ "${ln_works}" = yes ] ; then + mkdir dir || framework_failure + cat cfoo > dir/foo || framework_failure + cat cbar > dir/bar || framework_failure + cat cbaz > dir/baz || framework_failure + ln -s dir dir_link || framework_failure + "${TARLZ}" -0 -cf out1 dir_link || test_failed $LINENO + "${TARLZ}" --un -cf out2 dir_link || test_failed $LINENO + "${TARLZ}" -0 -n0 -cf out3 dir_link || test_failed $LINENO + "${TARLZ}" -0 -h -cf hout1 dir_link || test_failed $LINENO + "${TARLZ}" --un -h -cf hout2 dir_link || test_failed $LINENO + "${TARLZ}" -0 -n0 -h -cf hout3 dir_link || test_failed $LINENO + rm -rf dir dir_link || framework_failure + for i in 1 2 3 ; do + "${TARLZ}" -xf out$i --exclude='dir_link/*' dir_link || + test_failed $LINENO $i # Cygwin stores dir_link/* + [ -h dir_link ] || test_failed $LINENO $i + "${TARLZ}" -q -tf out$i dir_link/foo && cyg_symlink $LINENO $i + "${TARLZ}" -q -tf out$i dir_link/bar && cyg_symlink $LINENO $i + "${TARLZ}" -q -tf out$i dir_link/baz && cyg_symlink $LINENO $i + rm -rf dir_link out$i || framework_failure + "${TARLZ}" -xf hout$i || test_failed $LINENO $i + [ -d dir_link ] || test_failed $LINENO $i + cmp cfoo dir_link/foo || test_failed $LINENO $i + cmp cbar dir_link/bar || test_failed $LINENO $i + cmp cbaz dir_link/baz || test_failed $LINENO $i + rm -rf dir_link hout$i || framework_failure + done +fi + +printf "\ntesting --append..." + +# test --append compressed +cat cfoo > foo || framework_failure +cat cbar > bar || framework_failure +cat cbaz > baz || framework_failure +"${TARLZ}" -0 -cf out.tar.lz foo bar baz --out-slots=1024 || test_failed $LINENO +"${TARLZ}" -0 -cf nout.tar.lz foo bar baz --no-solid || test_failed $LINENO +"${TARLZ}" -0 -cf aout.tar.lz foo || test_failed $LINENO +"${TARLZ}" -0 -rf aout.tar.lz bar baz --no-solid || test_failed $LINENO +cmp nout.tar.lz aout.tar.lz || test_failed $LINENO +rm -f nout.tar.lz aout.tar.lz || framework_failure +touch aout.tar || framework_failure # wrong extension empty file +"${TARLZ}" -0 -rf aout.tar foo bar baz || test_failed $LINENO +cmp out.tar.lz aout.tar || test_failed $LINENO +rm -f aout.tar || framework_failure +touch aout.tar.lz || framework_failure # append to empty file +"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -0 -rf aout.tar.lz || test_failed $LINENO # append nothing +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -0 -rf aout.tar.lz -C nx_dir || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -0 -q -rf aout.tar.lz nx_file +[ $? = 1 ] || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -0 -q -rf aout.tar.lz aout.tar.lz || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" -0 -r foo bar baz > aout.tar.lz || test_failed $LINENO # to stdout +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" --un -q -rf aout.tar.lz foo bar baz # wrong extension archive +[ $? = 2 ] || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +cat "${eoa_lz}" > aout.tar.lz || framework_failure # append to empty archive +"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +"${TARLZ}" --un -q -rf aout.tar.lz foo bar baz # wrong extension empty archive +[ $? = 2 ] || test_failed $LINENO +cmp out.tar.lz aout.tar.lz || test_failed $LINENO +rm -f out.tar.lz aout.tar.lz || framework_failure + +# test --append --uncompressed +"${TARLZ}" --un -cf out.tar foo bar baz || test_failed $LINENO +"${TARLZ}" --un -cf aout.tar foo || test_failed $LINENO +"${TARLZ}" --un -rf aout.tar foo bar baz --exclude foo || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +rm -f aout.tar || framework_failure +touch aout.tar.lz empty || framework_failure # wrong extension empty file +"${TARLZ}" --un -q -rf aout.tar.lz foo bar baz +[ $? = 2 ] || test_failed $LINENO +cmp aout.tar.lz empty || test_failed $LINENO +rm -f aout.tar.lz empty || framework_failure +touch aout.tar || framework_failure # append to empty file +"${TARLZ}" --un -rf aout.tar foo bar baz || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" --un -rf aout.tar || test_failed $LINENO # append nothing +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" --un -rf aout.tar -C nx_dir || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" --un -q -rf aout.tar nx_file +[ $? = 1 ] || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" --un -q -rf aout.tar aout.tar || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" --un -r foo bar baz > aout.tar || test_failed $LINENO # to stdout +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" -0 -q -rf aout.tar foo bar baz # wrong extension archive +[ $? = 2 ] || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +cat "${eoa}" > aout.tar || framework_failure # append to empty archive +"${TARLZ}" --un -rf aout.tar foo bar baz || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +"${TARLZ}" -0 -q -rf aout.tar foo bar baz # wrong extension empty archive +[ $? = 2 ] || test_failed $LINENO +cmp out.tar aout.tar || test_failed $LINENO +rm -f out.tar aout.tar || framework_failure + +# test --append to solid archive +"${TARLZ}" --solid -q -0 -cf out.tar.lz "${in}" foo bar || test_failed $LINENO +"${TARLZ}" -q -tf out.tar.lz || test_failed $LINENO # compressed seekable +cat out.tar.lz > aout.tar.lz || framework_failure +for i in --asolid --bsolid --dsolid --solid -0 ; do + "${TARLZ}" $i -q -rf out.tar.lz baz + [ $? = 2 ] || test_failed $LINENO $i + cmp out.tar.lz aout.tar.lz || test_failed $LINENO $i +done +rm -f out.tar.lz aout.tar.lz || framework_failure +for i in --asolid --bsolid --dsolid -0 ; do + for j in --asolid --bsolid --dsolid --solid -0 ; do + "${TARLZ}" $i -0 -cf out.tar.lz foo || test_failed $LINENO "$i $j" + "${TARLZ}" $j -0 -rf out.tar.lz bar baz || test_failed $LINENO "$i $j" + rm -f foo bar baz || framework_failure + "${TARLZ}" -xf out.tar.lz || test_failed $LINENO "$i $j" + cmp cfoo foo || test_failed $LINENO "$i $j" + cmp cbar bar || test_failed $LINENO "$i $j" + cmp cbaz baz || test_failed $LINENO "$i $j" + rm -f out.tar.lz || framework_failure + done +done +rm -f foo bar baz || framework_failure + +printf "\ntesting dirs and links..." + +# test -c -d -x on directories and links +mkdir dir1 || framework_failure +"${TARLZ}" -0 -cf out.tar.lz dir1 || test_failed $LINENO +rmdir dir1 || framework_failure +"${TARLZ}" -xf out.tar.lz || test_failed $LINENO +[ -d dir1 ] || test_failed $LINENO +rmdir dir1 +rm -f out.tar.lz || framework_failure +mkdir dir1 || framework_failure +"${TARLZ}" --uncompressed -cf out.tar dir1 || test_failed $LINENO +rmdir dir1 || framework_failure +"${TARLZ}" -xf out.tar || test_failed $LINENO +[ -d dir1 ] || test_failed $LINENO +rmdir dir1 +rm -f out.tar || framework_failure + +if [ "${ln_works}" = yes ] ; then + name_100=name_100_bytes_long_nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn + path_100=dir1/dir2/dir3/path_100_bytes_long_nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn + path_106=dir1/dir2/dir3/path_longer_than_100_bytes_nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn + mkdir dir1 || framework_failure + mkdir dir1/dir2 || framework_failure + mkdir dir1/dir2/dir3 || framework_failure + cat "${in}" > dir1/dir2/dir3/in || framework_failure + ln dir1/dir2/dir3/in dir1/dir2/dir3/"${name_100}" || framework_failure + ln dir1/dir2/dir3/in "${path_100}" || framework_failure + ln dir1/dir2/dir3/in "${path_106}" || framework_failure + ln -s dir2/ dir1/dir2_link || framework_failure + ln -s in dir1/dir2/dir3/link || framework_failure + ln -s "${name_100}" dir1/dir2/dir3/link_100 || framework_failure + "${TARLZ}" -0 -cf out.tar.lz dir1 || test_failed $LINENO + "${TARLZ}" -df out.tar.lz || test_failed $LINENO + rm -rf dir1 || framework_failure + "${TARLZ}" -xf out.tar.lz || test_failed $LINENO + "${TARLZ}" -df out.tar.lz || test_failed $LINENO + cmp "${in}" dir1/dir2/dir3/in || test_failed $LINENO + cmp "${in}" dir1/dir2_link/dir3/in || test_failed $LINENO + cmp "${in}" dir1/dir2/dir3/"${name_100}" || test_failed $LINENO + cmp "${in}" "${path_100}" || test_failed $LINENO + cmp "${in}" "${path_106}" || test_failed $LINENO + cmp "${in}" dir1/dir2/dir3/link || test_failed $LINENO + cmp "${in}" dir1/dir2/dir3/link_100 || test_failed $LINENO + rm -f dir1/dir2/dir3/in || framework_failure + cmp "${in}" dir1/dir2/dir3/link 2> /dev/null && test_failed $LINENO + cmp "${in}" dir1/dir2/dir3/link_100 || test_failed $LINENO + "${TARLZ}" -xf out.tar.lz || test_failed $LINENO + rm -f out.tar.lz || framework_failure + cmp "${in}" dir1/dir2/dir3/in || test_failed $LINENO + cmp "${in}" dir1/dir2/dir3/link || test_failed $LINENO + "${TARLZ}" -0 -q -c ../tmp/dir1 | "${TARLZ}" -x || test_failed $LINENO + diff -ru tmp/dir1 dir1 || test_failed $LINENO + rm -rf tmp dir1 || framework_failure + # test -c -d -x on dangling (broken) symlinks with trailing slashes + "${TARLZ}" -xf "${testdir}"/ts_in_link.tar.lz || test_failed $LINENO + "${TARLZ}" -df "${testdir}"/ts_in_link.tar.lz --ignore-ids || + test_failed $LINENO + "${TARLZ}" -0 -cf out.tar.lz link1 link2 link3 link4 || test_failed $LINENO + "${TARLZ}" -df out.tar.lz || test_failed $LINENO + rm -f out.tar.lz link1 link2 link3 link4 || framework_failure +fi + +printf "\ntesting long names..." + +"${TARLZ}" -q -tf "${t155}" || test_failed $LINENO +"${TARLZ}" -q -tf "${t155_lz}" || test_failed $LINENO +if [ "${ln_works}" = yes ] ; then + mkdir dir1 || framework_failure + "${TARLZ}" -C dir1 -xf "${t155}" || test_failed $LINENO + mkdir dir2 || framework_failure + "${TARLZ}" -C dir2 -xf "${t155_lz}" || test_failed $LINENO + diff -ru dir1 dir2 || test_failed $LINENO + "${TARLZ}" -cf out.tar.lz dir2 || test_failed $LINENO + rm -rf dir2 || framework_failure + "${TARLZ}" -xf out.tar.lz || test_failed $LINENO + diff -ru dir1 dir2 || test_failed $LINENO + rmdir dir2 2> /dev/null && test_failed $LINENO + rmdir dir1 2> /dev/null && test_failed $LINENO + rm -rf out.tar.lz dir2 dir1 || framework_failure +fi + +"${TARLZ}" -tvf "${testdir}"/ug32chars.tar.lz | grep -q \ + -e very_long_owner_name_of_32_chars/very_long_group_name_of_32_chars || + test_failed $LINENO +"${TARLZ}" -tvf "${testdir}"/ug32chars.tar.lz | grep -q \ + -e very_long_owner_name_of_32_charsvery_long_group_name_of_32_chars && + test_failed $LINENO +"${TARLZ}" -tvf "${testdir}"/ug32chars.tar.lz | grep -q \ + -e very_long_group_name_of_32_chars/very_long_group_name_of_32_chars && + test_failed $LINENO +"${TARLZ}" -xf "${testdir}"/ug32chars.tar.lz || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +rm -f foo || framework_failure +"${TARLZ}" -tvf "${testdir}"/ug32767.tar.lz | grep -q -e 32767/32767 || + test_failed $LINENO +"${TARLZ}" -xf "${testdir}"/ug32767.tar.lz || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +rm -f foo || framework_failure + +printf "\ntesting --compress..." + +cat cfoo > foo || framework_failure +cat cbar > bar || framework_failure +cat cbaz > baz || framework_failure +cat "${in}" > test.txt || framework_failure +"${TARLZ}" --un -cf out.tar test.txt foo bar baz test.txt || test_failed $LINENO +"${TARLZ}" --un -cf out3.tar foo bar baz || test_failed $LINENO +cat out.tar > outz.tar || framework_failure +cat out3.tar > out3z.tar || framework_failure +# +"${TARLZ}" -0 -z outz.tar out3z.tar || test_failed $LINENO +"${TARLZ}" -q -tf outz.tar.lz || test_failed $LINENO +"${TARLZ}" -q -tf out3z.tar.lz || test_failed $LINENO +cat outz.tar.lz > out || test_failed $LINENO +cat out3z.tar.lz > out3 || test_failed $LINENO +rm -f out3z.tar.lz || framework_failure +"${TARLZ}" -q -0 -z outz.tar out3z.tar # outz.tar.lz exists +[ $? = 1 ] || test_failed $LINENO +cmp out outz.tar.lz || test_failed $LINENO +cmp out3 out3z.tar.lz || test_failed $LINENO +if [ "${ln_works}" = yes ] ; then + ln -s outz.tar loutz.tar || framework_failure + "${TARLZ}" -0 -z loutz.tar || test_failed $LINENO + cmp loutz.tar.lz outz.tar.lz || test_failed $LINENO + rm -f loutz.tar.lz loutz.tar || framework_failure +fi +rm -f out out3 outz.tar.lz out3z.tar.lz || framework_failure +# +for i in --solid --no-solid ; do + "${TARLZ}" -0 -n0 $i -cf out.tar.lz test.txt foo bar baz test.txt || test_failed $LINENO $i + "${TARLZ}" -0 -z -o - $i out.tar | cmp out.tar.lz - || test_failed $LINENO $i + "${TARLZ}" -0 -n0 $i -cf out3.tar.lz foo bar baz || test_failed $LINENO $i + "${TARLZ}" -0 -z -o - $i out3.tar | cmp out3.tar.lz - || test_failed $LINENO $i + "${TARLZ}" -0 -z $i outz.tar out3z.tar || test_failed $LINENO $i + cmp out.tar.lz outz.tar.lz || test_failed $LINENO $i + cmp out3.tar.lz out3z.tar.lz || test_failed $LINENO $i + rm -f outz.tar.lz out3z.tar.lz || framework_failure +done +# +"${TARLZ}" -0 -B8KiB -n0 --bsolid -cf out.tar.lz test.txt foo bar baz test.txt || test_failed $LINENO +"${TARLZ}" -0 -B8KiB -z -o - --bsolid out.tar | cmp out.tar.lz - || test_failed $LINENO +"${TARLZ}" -0 -B8KiB -z -o out --bsolid out.tar || test_failed $LINENO +cmp out.tar.lz out || test_failed $LINENO +"${TARLZ}" -0 -B8KiB -z --bsolid outz.tar || test_failed $LINENO +cmp out.tar.lz outz.tar.lz || test_failed $LINENO +rm -f out outz.tar.lz || framework_failure +# +"${TARLZ}" -0 -n0 --asolid -cf out.tar.lz test.txt foo bar baz test.txt || test_failed $LINENO +"${TARLZ}" -0 -n0 --asolid -cf out3.tar.lz foo bar baz || test_failed $LINENO +for i in --asolid --bsolid --dsolid ; do + cat out.tar | "${TARLZ}" -0 -z $i | cmp out.tar.lz - || test_failed $LINENO $i + "${TARLZ}" -0 -z -o out $i out.tar || test_failed $LINENO $i + cmp out.tar.lz out || test_failed $LINENO $i + "${TARLZ}" -0 -z $i outz.tar out3z.tar || test_failed $LINENO $i + cmp out.tar.lz outz.tar.lz || test_failed $LINENO $i + cmp out3.tar.lz out3z.tar.lz || test_failed $LINENO $i + rm -f out outz.tar.lz out3z.tar.lz || framework_failure +done +# concatenate and compress +"${TARLZ}" --un -cf foo.tar foo || test_failed $LINENO +"${TARLZ}" --un -cf bar.tar bar || test_failed $LINENO +"${TARLZ}" --un -cf baz.tar baz || test_failed $LINENO +"${TARLZ}" -A foo.tar bar.tar baz.tar | "${TARLZ}" -0 -z -o foobarbaz.tar.lz || + test_failed $LINENO +cmp out3.tar.lz foobarbaz.tar.lz || test_failed $LINENO +# compress and concatenate +"${TARLZ}" -0 -z foo.tar bar.tar baz.tar || test_failed $LINENO +"${TARLZ}" -A foo.tar.lz bar.tar.lz baz.tar.lz > foobarbaz.tar.lz || + test_failed $LINENO +"${TARLZ}" -0 -n0 --no-solid -c foo bar baz | cmp foobarbaz.tar.lz - || + test_failed $LINENO +rm -f foo bar baz test.txt out.tar.lz out.tar outz.tar foobarbaz.tar.lz \ + out3.tar out3.tar.lz out3z.tar foo.tar bar.tar baz.tar \ + foo.tar.lz bar.tar.lz baz.tar.lz || framework_failure + +printf "\ntesting bad input..." + +# test --extract ".." +mkdir dir1 || framework_failure +cd dir1 || framework_failure +for i in 0 2 ; do # try serial and parallel decoders + "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot1.tar.lz || test_failed $LINENO $i + [ ! -e ../dir ] || test_failed $LINENO $i + "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot2.tar.lz || test_failed $LINENO $i + [ ! -e ../dir ] || test_failed $LINENO $i + "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot3.tar.lz || test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot4.tar.lz || test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i + "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot5.tar.lz || test_failed $LINENO $i + [ ! -e dir ] || test_failed $LINENO $i +done +cd .. || framework_failure +rm -rf dir1 || framework_failure + +# test --list and --extract truncated tar +dd if="${in_tar}" of=truncated.tar bs=1000 count=1 2> /dev/null +"${TARLZ}" -q -tf truncated.tar > /dev/null +[ $? = 2 ] || test_failed $LINENO +"${TARLZ}" -q -xf truncated.tar +[ $? = 2 ] || test_failed $LINENO +[ ! -e test.txt ] || test_failed $LINENO +rm -f truncated.tar || framework_failure + +# test --delete with split 'bar' tar member +for i in 1 2 3 4 ; do + cat "${testdir}"/test3_sm${i}.tar.lz > out.tar.lz || framework_failure + for j in bar baz ; do + "${TARLZ}" -q -f out.tar.lz --delete $j + [ $? = 2 ] || test_failed $LINENO "$i $j" + done + cmp "${testdir}"/test3_sm${i}.tar.lz out.tar.lz || test_failed $LINENO $i + "${TARLZ}" -q -f out.tar.lz --delete foo + [ $? = 2 ] || test_failed $LINENO $i + "${TARLZ}" -xf out.tar.lz || test_failed $LINENO $i + [ ! -e foo ] || test_failed $LINENO $i + cmp cbar bar || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f out.tar.lz foo bar baz || framework_failure +done + +# test --list and --extract format violations +if [ "${ln_works}" = yes ] ; then + mkdir dir1 || framework_failure + "${TARLZ}" -C dir1 -xf "${t155}" || test_failed $LINENO +fi +for i in 1 2 3 ; do + "${TARLZ}" -q -tf "${testdir}"/t155_fv${i}.tar + [ $? = 2 ] || test_failed $LINENO $i + "${TARLZ}" -q -tf "${testdir}"/t155_fv${i}.tar --permissive || + test_failed $LINENO $i + if [ "${ln_works}" = yes ] ; then + mkdir dir2 || framework_failure + "${TARLZ}" -C dir2 -xf "${testdir}"/t155_fv${i}.tar --permissive || + test_failed $LINENO $i + diff -ru dir1 dir2 || test_failed $LINENO $i + rm -rf dir2 || framework_failure + fi +done +for i in 1 2 3 4 5 6 ; do + "${TARLZ}" -q -tf "${testdir}"/t155_fv${i}.tar.lz + [ $? = 2 ] || test_failed $LINENO $i + "${TARLZ}" -q -tf "${testdir}"/t155_fv${i}.tar.lz --permissive || + test_failed $LINENO $i + if [ "${ln_works}" = yes ] ; then + mkdir dir2 || framework_failure + "${TARLZ}" -n4 -C dir2 -xf "${testdir}"/t155_fv${i}.tar.lz --permissive || + test_failed $LINENO $i + diff -ru dir1 dir2 || test_failed $LINENO $i + rm -rf dir2 || framework_failure + fi +done +if [ "${ln_works}" = yes ] ; then rm -rf dir1 || framework_failure ; fi + +for i in "${testdir}"/test3_nn.tar "${testdir}"/test3_nn.tar.lz ; do + "${TARLZ}" -q -n0 -tf "$i" || test_failed $LINENO $i + "${TARLZ}" -q -n4 -tf "$i" || test_failed $LINENO $i + "${TARLZ}" -q -n0 -xf "$i" || test_failed $LINENO $i + "${TARLZ}" -n0 -df "$i" --ignore-ids || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + [ ! -e bar ] || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure + "${TARLZ}" -q -n4 -xf "$i" || test_failed $LINENO $i + "${TARLZ}" -n4 -df "$i" --ignore-ids || test_failed $LINENO $i + cmp cfoo foo || test_failed $LINENO $i + [ ! -e bar ] || test_failed $LINENO $i + cmp cbaz baz || test_failed $LINENO $i + rm -f foo bar baz || framework_failure +done + +printf "\ntesting --keep-damaged..." + +# test --extract and --keep-damaged compressed +rm -f test.txt || framework_failure +for i in "${inbad1}" "${inbad2}" ; do + "${TARLZ}" -q -xf "${i}.tar.lz" + [ $? = 2 ] || test_failed $LINENO "$i" + [ ! -e test.txt ] || test_failed $LINENO "$i" + rm -f test.txt || framework_failure + "${TARLZ}" -q -n0 -xf "${i}.tar.lz" --keep-damaged + [ $? = 2 ] || test_failed $LINENO "$i" + [ -e test.txt ] || test_failed $LINENO "$i" + cmp "$i" test.txt 2> /dev/null || test_failed $LINENO $i + rm -f test.txt || framework_failure +done +# +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad1_lz}" +[ $? = 2 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad2_lz}" +[ $? = 2 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad3_lz}" +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad3_lz}" --keep-damaged +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar 2> /dev/null || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad4_lz}" +[ $? = 2 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad4_lz}" --keep-damaged +[ $? = 2 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +cmp cbar bar 2> /dev/null || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad5_lz}" +[ $? = 2 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad5_lz}" --keep-damaged +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo 2> /dev/null || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -n0 -xf "${bad6_lz}" +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO + +# test --extract and --keep-damaged uncompressed +rm -f test.txt || framework_failure +"${TARLZ}" -q -xf "${inbad1}.tar" +[ $? = 2 ] || test_failed $LINENO +[ ! -e test.txt ] || test_failed $LINENO +rm -f test.txt || framework_failure +"${TARLZ}" -q -xf "${inbad1}.tar" --keep-damaged +[ $? = 2 ] || test_failed $LINENO +[ -e test.txt ] || test_failed $LINENO +cmp "${inbad1}" test.txt 2> /dev/null || test_failed $LINENO +rm -f test.txt || framework_failure +# +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -xf "${bad1}" +[ $? = 2 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -xf "${bad2}" +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -xf "${bad3}" +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -xf "${bad4}" +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +rm -f foo bar baz || framework_failure +"${TARLZ}" -q -xf "${bad5}" +[ $? = 2 ] || test_failed $LINENO +cmp cfoo foo || test_failed $LINENO +cmp cbar bar || test_failed $LINENO +cmp cbaz baz || test_failed $LINENO +rm -f cfoo cbar cbaz foo bar baz || framework_failure +# +rm -f test3.tar.lz || framework_failure +"${TARLZ}" -q -xf "${tlzit1}" +[ $? = 2 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +[ ! -e test3.tar.lz ] || test_failed $LINENO +rm -f foo bar baz test3.tar.lz || framework_failure +"${TARLZ}" -q -xf "${tlzit2}" +[ $? = 2 ] || test_failed $LINENO +[ ! -e foo ] || test_failed $LINENO +[ ! -e bar ] || test_failed $LINENO +[ ! -e baz ] || test_failed $LINENO +cmp "${test3_lz}" test3.tar.lz || test_failed $LINENO +rm -f foo bar baz test3.tar.lz || framework_failure + +echo +if [ ${fail} = 0 ] ; then + echo "tests completed successfully." + cd "${objdir}" && rm -r tmp +else + echo "tests failed." +fi +exit ${fail} diff --git a/testsuite/dotdot1.tar.lz b/testsuite/dotdot1.tar.lz new file mode 100644 index 0000000..9884d9f Binary files /dev/null and b/testsuite/dotdot1.tar.lz differ diff --git a/testsuite/dotdot2.tar.lz b/testsuite/dotdot2.tar.lz new file mode 100644 index 0000000..a60b898 Binary files /dev/null and b/testsuite/dotdot2.tar.lz differ diff --git a/testsuite/dotdot3.tar.lz b/testsuite/dotdot3.tar.lz new file mode 100644 index 0000000..163fb5c Binary files /dev/null and b/testsuite/dotdot3.tar.lz differ diff --git a/testsuite/dotdot4.tar.lz b/testsuite/dotdot4.tar.lz new file mode 100644 index 0000000..8c6a0ee Binary files /dev/null and b/testsuite/dotdot4.tar.lz differ diff --git a/testsuite/dotdot5.tar.lz b/testsuite/dotdot5.tar.lz new file mode 100644 index 0000000..a62cd18 Binary files /dev/null and b/testsuite/dotdot5.tar.lz differ diff --git a/testsuite/eoa_blocks.tar b/testsuite/eoa_blocks.tar new file mode 100644 index 0000000..06d7405 Binary files /dev/null and b/testsuite/eoa_blocks.tar differ diff --git a/testsuite/eoa_blocks.tar.lz b/testsuite/eoa_blocks.tar.lz new file mode 100644 index 0000000..328273c Binary files /dev/null and b/testsuite/eoa_blocks.tar.lz differ diff --git a/testsuite/rbar b/testsuite/rbar new file mode 100644 index 0000000..5716ca5 --- /dev/null +++ b/testsuite/rbar @@ -0,0 +1 @@ +bar diff --git a/testsuite/rbaz b/testsuite/rbaz new file mode 100644 index 0000000..7601807 --- /dev/null +++ b/testsuite/rbaz @@ -0,0 +1 @@ +baz diff --git a/testsuite/rfoo b/testsuite/rfoo new file mode 100644 index 0000000..257cc56 --- /dev/null +++ b/testsuite/rfoo @@ -0,0 +1 @@ +foo diff --git a/testsuite/t155.tar b/testsuite/t155.tar new file mode 100644 index 0000000..f2b8a4e Binary files /dev/null and b/testsuite/t155.tar differ diff --git a/testsuite/t155.tar.lz b/testsuite/t155.tar.lz new file mode 100644 index 0000000..edc7f04 Binary files /dev/null and b/testsuite/t155.tar.lz differ diff --git a/testsuite/t155_fv1.tar b/testsuite/t155_fv1.tar new file mode 100644 index 0000000..1ef64c3 Binary files /dev/null and b/testsuite/t155_fv1.tar differ diff --git a/testsuite/t155_fv1.tar.lz b/testsuite/t155_fv1.tar.lz new file mode 100644 index 0000000..896925e Binary files /dev/null and b/testsuite/t155_fv1.tar.lz differ diff --git a/testsuite/t155_fv2.tar b/testsuite/t155_fv2.tar new file mode 100644 index 0000000..f732b30 Binary files /dev/null and b/testsuite/t155_fv2.tar differ diff --git a/testsuite/t155_fv2.tar.lz b/testsuite/t155_fv2.tar.lz new file mode 100644 index 0000000..b380105 Binary files /dev/null and b/testsuite/t155_fv2.tar.lz differ diff --git a/testsuite/t155_fv3.tar b/testsuite/t155_fv3.tar new file mode 100644 index 0000000..fe5db13 Binary files /dev/null and b/testsuite/t155_fv3.tar differ diff --git a/testsuite/t155_fv3.tar.lz b/testsuite/t155_fv3.tar.lz new file mode 100644 index 0000000..aa24c0a Binary files /dev/null and b/testsuite/t155_fv3.tar.lz differ diff --git a/testsuite/t155_fv4.tar.lz b/testsuite/t155_fv4.tar.lz new file mode 100644 index 0000000..e3ae9c3 Binary files /dev/null and b/testsuite/t155_fv4.tar.lz differ diff --git a/testsuite/t155_fv5.tar.lz b/testsuite/t155_fv5.tar.lz new file mode 100644 index 0000000..966015a Binary files /dev/null and b/testsuite/t155_fv5.tar.lz differ diff --git a/testsuite/t155_fv6.tar.lz b/testsuite/t155_fv6.tar.lz new file mode 100644 index 0000000..bc83237 Binary files /dev/null and b/testsuite/t155_fv6.tar.lz differ diff --git a/testsuite/tar_in_tlz1.tar.lz b/testsuite/tar_in_tlz1.tar.lz new file mode 100644 index 0000000..bf04f25 Binary files /dev/null and b/testsuite/tar_in_tlz1.tar.lz differ diff --git a/testsuite/tar_in_tlz2.tar.lz b/testsuite/tar_in_tlz2.tar.lz new file mode 100644 index 0000000..de8453b Binary files /dev/null and b/testsuite/tar_in_tlz2.tar.lz differ diff --git a/testsuite/test.txt b/testsuite/test.txt new file mode 100644 index 0000000..9196a3a --- /dev/null +++ b/testsuite/test.txt @@ -0,0 +1,676 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/testsuite/test.txt.lz b/testsuite/test.txt.lz new file mode 100644 index 0000000..46f98a7 Binary files /dev/null and b/testsuite/test.txt.lz differ diff --git a/testsuite/test.txt.tar b/testsuite/test.txt.tar new file mode 100644 index 0000000..d687b43 Binary files /dev/null and b/testsuite/test.txt.tar differ diff --git a/testsuite/test.txt.tar.lz b/testsuite/test.txt.tar.lz new file mode 100644 index 0000000..306eeeb Binary files /dev/null and b/testsuite/test.txt.tar.lz differ diff --git a/testsuite/test3.tar b/testsuite/test3.tar new file mode 100644 index 0000000..d58fb45 Binary files /dev/null and b/testsuite/test3.tar differ diff --git a/testsuite/test3.tar.lz b/testsuite/test3.tar.lz new file mode 100644 index 0000000..779ace4 Binary files /dev/null and b/testsuite/test3.tar.lz differ diff --git a/testsuite/test3_bad1.tar b/testsuite/test3_bad1.tar new file mode 100644 index 0000000..005b6a3 Binary files /dev/null and b/testsuite/test3_bad1.tar differ diff --git a/testsuite/test3_bad1.tar.lz b/testsuite/test3_bad1.tar.lz new file mode 100644 index 0000000..9f5d40f Binary files /dev/null and b/testsuite/test3_bad1.tar.lz differ diff --git a/testsuite/test3_bad2.tar b/testsuite/test3_bad2.tar new file mode 100644 index 0000000..d2c546b Binary files /dev/null and b/testsuite/test3_bad2.tar differ diff --git a/testsuite/test3_bad2.tar.lz b/testsuite/test3_bad2.tar.lz new file mode 100644 index 0000000..182c048 Binary files /dev/null and b/testsuite/test3_bad2.tar.lz differ diff --git a/testsuite/test3_bad3.tar b/testsuite/test3_bad3.tar new file mode 100644 index 0000000..7d78e87 Binary files /dev/null and b/testsuite/test3_bad3.tar differ diff --git a/testsuite/test3_bad3.tar.lz b/testsuite/test3_bad3.tar.lz new file mode 100644 index 0000000..3b46163 Binary files /dev/null and b/testsuite/test3_bad3.tar.lz differ diff --git a/testsuite/test3_bad4.tar b/testsuite/test3_bad4.tar new file mode 100644 index 0000000..68312b3 Binary files /dev/null and b/testsuite/test3_bad4.tar differ diff --git a/testsuite/test3_bad4.tar.lz b/testsuite/test3_bad4.tar.lz new file mode 100644 index 0000000..7ac6d98 Binary files /dev/null and b/testsuite/test3_bad4.tar.lz differ diff --git a/testsuite/test3_bad5.tar b/testsuite/test3_bad5.tar new file mode 100644 index 0000000..e482969 Binary files /dev/null and b/testsuite/test3_bad5.tar differ diff --git a/testsuite/test3_bad5.tar.lz b/testsuite/test3_bad5.tar.lz new file mode 100644 index 0000000..5b4feb3 Binary files /dev/null and b/testsuite/test3_bad5.tar.lz differ diff --git a/testsuite/test3_bad6.tar.lz b/testsuite/test3_bad6.tar.lz new file mode 100644 index 0000000..42b3888 Binary files /dev/null and b/testsuite/test3_bad6.tar.lz differ diff --git a/testsuite/test3_dir.tar b/testsuite/test3_dir.tar new file mode 100644 index 0000000..e0c2b29 Binary files /dev/null and b/testsuite/test3_dir.tar differ diff --git a/testsuite/test3_dir.tar.lz b/testsuite/test3_dir.tar.lz new file mode 100644 index 0000000..8eb3f43 Binary files /dev/null and b/testsuite/test3_dir.tar.lz differ diff --git a/testsuite/test3_dot.tar.lz b/testsuite/test3_dot.tar.lz new file mode 100644 index 0000000..8fd3d1f Binary files /dev/null and b/testsuite/test3_dot.tar.lz differ diff --git a/testsuite/test3_em1.tar.lz b/testsuite/test3_em1.tar.lz new file mode 100644 index 0000000..0aa8724 Binary files /dev/null and b/testsuite/test3_em1.tar.lz differ diff --git a/testsuite/test3_em2.tar.lz b/testsuite/test3_em2.tar.lz new file mode 100644 index 0000000..4fe4e5d Binary files /dev/null and b/testsuite/test3_em2.tar.lz differ diff --git a/testsuite/test3_em3.tar.lz b/testsuite/test3_em3.tar.lz new file mode 100644 index 0000000..49e2eab Binary files /dev/null and b/testsuite/test3_em3.tar.lz differ diff --git a/testsuite/test3_em4.tar.lz b/testsuite/test3_em4.tar.lz new file mode 100644 index 0000000..95df508 Binary files /dev/null and b/testsuite/test3_em4.tar.lz differ diff --git a/testsuite/test3_em5.tar.lz b/testsuite/test3_em5.tar.lz new file mode 100644 index 0000000..706beb5 Binary files /dev/null and b/testsuite/test3_em5.tar.lz differ diff --git a/testsuite/test3_em6.tar.lz b/testsuite/test3_em6.tar.lz new file mode 100644 index 0000000..806884d Binary files /dev/null and b/testsuite/test3_em6.tar.lz differ diff --git a/testsuite/test3_eoa1.tar b/testsuite/test3_eoa1.tar new file mode 100644 index 0000000..175b807 Binary files /dev/null and b/testsuite/test3_eoa1.tar differ diff --git a/testsuite/test3_eoa1.tar.lz b/testsuite/test3_eoa1.tar.lz new file mode 100644 index 0000000..0eb86e4 Binary files /dev/null and b/testsuite/test3_eoa1.tar.lz differ diff --git a/testsuite/test3_eoa2.tar b/testsuite/test3_eoa2.tar new file mode 100644 index 0000000..458be1e Binary files /dev/null and b/testsuite/test3_eoa2.tar differ diff --git a/testsuite/test3_eoa2.tar.lz b/testsuite/test3_eoa2.tar.lz new file mode 100644 index 0000000..1f47953 Binary files /dev/null and b/testsuite/test3_eoa2.tar.lz differ diff --git a/testsuite/test3_eoa3.tar b/testsuite/test3_eoa3.tar new file mode 100644 index 0000000..3003a93 Binary files /dev/null and b/testsuite/test3_eoa3.tar differ diff --git a/testsuite/test3_eoa3.tar.lz b/testsuite/test3_eoa3.tar.lz new file mode 100644 index 0000000..20ba9f8 Binary files /dev/null and b/testsuite/test3_eoa3.tar.lz differ diff --git a/testsuite/test3_eoa4.tar b/testsuite/test3_eoa4.tar new file mode 100644 index 0000000..4012fea Binary files /dev/null and b/testsuite/test3_eoa4.tar differ diff --git a/testsuite/test3_eoa4.tar.lz b/testsuite/test3_eoa4.tar.lz new file mode 100644 index 0000000..1593feb Binary files /dev/null and b/testsuite/test3_eoa4.tar.lz differ diff --git a/testsuite/test3_eoa5.tar.lz b/testsuite/test3_eoa5.tar.lz new file mode 100644 index 0000000..156bd3a Binary files /dev/null and b/testsuite/test3_eoa5.tar.lz differ diff --git a/testsuite/test3_gh1.tar b/testsuite/test3_gh1.tar new file mode 100644 index 0000000..f969561 Binary files /dev/null and b/testsuite/test3_gh1.tar differ diff --git a/testsuite/test3_gh1.tar.lz b/testsuite/test3_gh1.tar.lz new file mode 100644 index 0000000..d38f46b Binary files /dev/null and b/testsuite/test3_gh1.tar.lz differ diff --git a/testsuite/test3_gh2.tar b/testsuite/test3_gh2.tar new file mode 100644 index 0000000..f5f0c31 Binary files /dev/null and b/testsuite/test3_gh2.tar differ diff --git a/testsuite/test3_gh2.tar.lz b/testsuite/test3_gh2.tar.lz new file mode 100644 index 0000000..48f18dd Binary files /dev/null and b/testsuite/test3_gh2.tar.lz differ diff --git a/testsuite/test3_gh3.tar b/testsuite/test3_gh3.tar new file mode 100644 index 0000000..e0d3a9d Binary files /dev/null and b/testsuite/test3_gh3.tar differ diff --git a/testsuite/test3_gh3.tar.lz b/testsuite/test3_gh3.tar.lz new file mode 100644 index 0000000..89a31a6 Binary files /dev/null and b/testsuite/test3_gh3.tar.lz differ diff --git a/testsuite/test3_gh4.tar b/testsuite/test3_gh4.tar new file mode 100644 index 0000000..0655c31 Binary files /dev/null and b/testsuite/test3_gh4.tar differ diff --git a/testsuite/test3_gh4.tar.lz b/testsuite/test3_gh4.tar.lz new file mode 100644 index 0000000..5b9f605 Binary files /dev/null and b/testsuite/test3_gh4.tar.lz differ diff --git a/testsuite/test3_gh5.tar.lz b/testsuite/test3_gh5.tar.lz new file mode 100644 index 0000000..b8f4abe Binary files /dev/null and b/testsuite/test3_gh5.tar.lz differ diff --git a/testsuite/test3_gh6.tar.lz b/testsuite/test3_gh6.tar.lz new file mode 100644 index 0000000..7be9aca Binary files /dev/null and b/testsuite/test3_gh6.tar.lz differ diff --git a/testsuite/test3_nn.tar b/testsuite/test3_nn.tar new file mode 100644 index 0000000..c738dee Binary files /dev/null and b/testsuite/test3_nn.tar differ diff --git a/testsuite/test3_nn.tar.lz b/testsuite/test3_nn.tar.lz new file mode 100644 index 0000000..8f78c1b Binary files /dev/null and b/testsuite/test3_nn.tar.lz differ diff --git a/testsuite/test3_sm1.tar.lz b/testsuite/test3_sm1.tar.lz new file mode 100644 index 0000000..6eb3947 Binary files /dev/null and b/testsuite/test3_sm1.tar.lz differ diff --git a/testsuite/test3_sm2.tar.lz b/testsuite/test3_sm2.tar.lz new file mode 100644 index 0000000..f312fcb Binary files /dev/null and b/testsuite/test3_sm2.tar.lz differ diff --git a/testsuite/test3_sm3.tar.lz b/testsuite/test3_sm3.tar.lz new file mode 100644 index 0000000..82ceb18 Binary files /dev/null and b/testsuite/test3_sm3.tar.lz differ diff --git a/testsuite/test3_sm4.tar.lz b/testsuite/test3_sm4.tar.lz new file mode 100644 index 0000000..601a640 Binary files /dev/null and b/testsuite/test3_sm4.tar.lz differ diff --git a/testsuite/test_bad1.txt b/testsuite/test_bad1.txt new file mode 100644 index 0000000..f8463f4 --- /dev/null +++ b/testsuite/test_bad1.txt @@ -0,0 +1,307 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program \ No newline at end of file diff --git a/testsuite/test_bad1.txt.tar b/testsuite/test_bad1.txt.tar new file mode 100644 index 0000000..dc2140e Binary files /dev/null and b/testsuite/test_bad1.txt.tar differ diff --git a/testsuite/test_bad1.txt.tar.lz b/testsuite/test_bad1.txt.tar.lz new file mode 100644 index 0000000..afb1e85 Binary files /dev/null and b/testsuite/test_bad1.txt.tar.lz differ diff --git a/testsuite/test_bad2.txt b/testsuite/test_bad2.txt new file mode 100644 index 0000000..452408f --- /dev/null +++ b/testsuite/test_bad2.txt @@ -0,0 +1,320 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. Ifodifnperived from the Progr"work based on therogrdifneneraeuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT X FR TO NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT X FR TO NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARQIS NO WARRATHERE IS NO WARRANTY +FOR THE + + This ee \ No newline at end of file diff --git a/testsuite/test_bad2.txt.tar.lz b/testsuite/test_bad2.txt.tar.lz new file mode 100644 index 0000000..598e121 Binary files /dev/null and b/testsuite/test_bad2.txt.tar.lz differ diff --git a/testsuite/tlz_in_tar1.tar b/testsuite/tlz_in_tar1.tar new file mode 100644 index 0000000..f2dfd6c Binary files /dev/null and b/testsuite/tlz_in_tar1.tar differ diff --git a/testsuite/tlz_in_tar2.tar b/testsuite/tlz_in_tar2.tar new file mode 100644 index 0000000..be860c6 Binary files /dev/null and b/testsuite/tlz_in_tar2.tar differ diff --git a/testsuite/ts_in_link.tar.lz b/testsuite/ts_in_link.tar.lz new file mode 100644 index 0000000..dff816c Binary files /dev/null and b/testsuite/ts_in_link.tar.lz differ diff --git a/testsuite/ug32767.tar.lz b/testsuite/ug32767.tar.lz new file mode 100644 index 0000000..499dc6d Binary files /dev/null and b/testsuite/ug32767.tar.lz differ diff --git a/testsuite/ug32chars.tar.lz b/testsuite/ug32chars.tar.lz new file mode 100644 index 0000000..6c4da26 Binary files /dev/null and b/testsuite/ug32chars.tar.lz differ -- cgit v1.2.3