summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-14 12:57:29 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-14 12:57:29 +0000
commit29146f385a524ad6a4b1b127cc3d9641a8fe0adc (patch)
tree1caea11496a3d9e0333cdf649d9f9be6d5a67b78
parentInitial commit. (diff)
downloadtarlz-29146f385a524ad6a4b1b127cc3d9641a8fe0adc.tar.xz
tarlz-29146f385a524ad6a4b1b127cc3d9641a8fe0adc.zip
Adding upstream version 0.25.upstream/0.25upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
-rw-r--r--AUTHORS1
-rw-r--r--COPYING338
-rw-r--r--ChangeLog236
-rw-r--r--INSTALL80
-rw-r--r--Makefile.in179
-rw-r--r--NEWS14
-rw-r--r--README96
-rw-r--r--archive_reader.cc273
-rw-r--r--archive_reader.h120
-rw-r--r--arg_parser.cc197
-rw-r--r--arg_parser.h110
-rw-r--r--common.cc72
-rw-r--r--common_decode.cc255
-rw-r--r--common_mutex.cc160
-rw-r--r--common_mutex.h30
-rw-r--r--compress.cc392
-rwxr-xr-xconfigure205
-rw-r--r--create.cc740
-rw-r--r--create.h47
-rw-r--r--create_lz.cc594
-rw-r--r--decode.cc533
-rw-r--r--decode.h35
-rw-r--r--decode_lz.cc765
-rw-r--r--delete.cc189
-rw-r--r--delete_lz.cc138
-rw-r--r--doc/tarlz.1180
-rw-r--r--doc/tarlz.info1287
-rw-r--r--doc/tarlz.texi1356
-rw-r--r--exclude.cc53
-rw-r--r--extended.cc422
-rw-r--r--lzip_index.cc210
-rw-r--r--lzip_index.h93
-rw-r--r--main.cc720
-rw-r--r--tarlz.h608
-rwxr-xr-xtestsuite/check.sh1481
-rw-r--r--testsuite/dotdot1.tar.lzbin0 -> 139 bytes
-rw-r--r--testsuite/dotdot2.tar.lzbin0 -> 140 bytes
-rw-r--r--testsuite/dotdot3.tar.lzbin0 -> 141 bytes
-rw-r--r--testsuite/dotdot4.tar.lzbin0 -> 140 bytes
-rw-r--r--testsuite/dotdot5.tar.lzbin0 -> 139 bytes
-rw-r--r--testsuite/eoa_blocks.tarbin0 -> 1024 bytes
-rw-r--r--testsuite/eoa_blocks.tar.lzbin0 -> 44 bytes
-rw-r--r--testsuite/rbar1
-rw-r--r--testsuite/rbaz1
-rw-r--r--testsuite/rfoo1
-rw-r--r--testsuite/t155.tarbin0 -> 9216 bytes
-rw-r--r--testsuite/t155.tar.lzbin0 -> 906 bytes
-rw-r--r--testsuite/t155_fv1.tarbin0 -> 10240 bytes
-rw-r--r--testsuite/t155_fv1.tar.lzbin0 -> 914 bytes
-rw-r--r--testsuite/t155_fv2.tarbin0 -> 10240 bytes
-rw-r--r--testsuite/t155_fv2.tar.lzbin0 -> 1042 bytes
-rw-r--r--testsuite/t155_fv3.tarbin0 -> 10240 bytes
-rw-r--r--testsuite/t155_fv3.tar.lzbin0 -> 915 bytes
-rw-r--r--testsuite/t155_fv4.tar.lzbin0 -> 1031 bytes
-rw-r--r--testsuite/t155_fv5.tar.lzbin0 -> 1173 bytes
-rw-r--r--testsuite/t155_fv6.tar.lzbin0 -> 1031 bytes
-rw-r--r--testsuite/tar_in_tlz1.tar.lzbin0 -> 7680 bytes
-rw-r--r--testsuite/tar_in_tlz2.tar.lzbin0 -> 7807 bytes
-rw-r--r--testsuite/test.txt676
-rw-r--r--testsuite/test.txt.lzbin0 -> 7392 bytes
-rw-r--r--testsuite/test.txt.tarbin0 -> 38400 bytes
-rw-r--r--testsuite/test.txt.tar.lzbin0 -> 7495 bytes
-rw-r--r--testsuite/test3.tarbin0 -> 4096 bytes
-rw-r--r--testsuite/test3.tar.lzbin0 -> 356 bytes
-rw-r--r--testsuite/test3_bad1.tarbin0 -> 4096 bytes
-rw-r--r--testsuite/test3_bad1.tar.lzbin0 -> 356 bytes
-rw-r--r--testsuite/test3_bad2.tarbin0 -> 4096 bytes
-rw-r--r--testsuite/test3_bad2.tar.lzbin0 -> 356 bytes
-rw-r--r--testsuite/test3_bad3.tarbin0 -> 4096 bytes
-rw-r--r--testsuite/test3_bad3.tar.lzbin0 -> 356 bytes
-rw-r--r--testsuite/test3_bad4.tarbin0 -> 4096 bytes
-rw-r--r--testsuite/test3_bad4.tar.lzbin0 -> 356 bytes
-rw-r--r--testsuite/test3_bad5.tarbin0 -> 4608 bytes
-rw-r--r--testsuite/test3_bad5.tar.lzbin0 -> 356 bytes
-rw-r--r--testsuite/test3_bad6.tar.lzbin0 -> 866 bytes
-rw-r--r--testsuite/test3_dir.tarbin0 -> 4096 bytes
-rw-r--r--testsuite/test3_dir.tar.lzbin0 -> 358 bytes
-rw-r--r--testsuite/test3_dot.tar.lzbin0 -> 1126 bytes
-rw-r--r--testsuite/test3_em1.tar.lzbin0 -> 392 bytes
-rw-r--r--testsuite/test3_em2.tar.lzbin0 -> 392 bytes
-rw-r--r--testsuite/test3_em3.tar.lzbin0 -> 392 bytes
-rw-r--r--testsuite/test3_em4.tar.lzbin0 -> 392 bytes
-rw-r--r--testsuite/test3_em5.tar.lzbin0 -> 392 bytes
-rw-r--r--testsuite/test3_em6.tar.lzbin0 -> 500 bytes
-rw-r--r--testsuite/test3_eoa1.tarbin0 -> 3072 bytes
-rw-r--r--testsuite/test3_eoa1.tar.lzbin0 -> 312 bytes
-rw-r--r--testsuite/test3_eoa2.tarbin0 -> 3584 bytes
-rw-r--r--testsuite/test3_eoa2.tar.lzbin0 -> 352 bytes
-rw-r--r--testsuite/test3_eoa3.tarbin0 -> 4608 bytes
-rw-r--r--testsuite/test3_eoa3.tar.lzbin0 -> 396 bytes
-rw-r--r--testsuite/test3_eoa4.tarbin0 -> 4096 bytes
-rw-r--r--testsuite/test3_eoa4.tar.lzbin0 -> 535 bytes
-rw-r--r--testsuite/test3_eoa5.tar.lzbin0 -> 535 bytes
-rw-r--r--testsuite/test3_gh1.tarbin0 -> 5120 bytes
-rw-r--r--testsuite/test3_gh1.tar.lzbin0 -> 574 bytes
-rw-r--r--testsuite/test3_gh2.tarbin0 -> 5120 bytes
-rw-r--r--testsuite/test3_gh2.tar.lzbin0 -> 607 bytes
-rw-r--r--testsuite/test3_gh3.tarbin0 -> 5120 bytes
-rw-r--r--testsuite/test3_gh3.tar.lzbin0 -> 645 bytes
-rw-r--r--testsuite/test3_gh4.tarbin0 -> 5120 bytes
-rw-r--r--testsuite/test3_gh4.tar.lzbin0 -> 795 bytes
-rw-r--r--testsuite/test3_gh5.tar.lzbin0 -> 574 bytes
-rw-r--r--testsuite/test3_gh6.tar.lzbin0 -> 521 bytes
-rw-r--r--testsuite/test3_nn.tarbin0 -> 4096 bytes
-rw-r--r--testsuite/test3_nn.tar.lzbin0 -> 350 bytes
-rw-r--r--testsuite/test3_sm1.tar.lzbin0 -> 579 bytes
-rw-r--r--testsuite/test3_sm2.tar.lzbin0 -> 612 bytes
-rw-r--r--testsuite/test3_sm3.tar.lzbin0 -> 650 bytes
-rw-r--r--testsuite/test3_sm4.tar.lzbin0 -> 798 bytes
-rw-r--r--testsuite/test_bad1.txt307
-rw-r--r--testsuite/test_bad1.txt.tarbin0 -> 17014 bytes
-rw-r--r--testsuite/test_bad1.txt.tar.lzbin0 -> 6000 bytes
-rw-r--r--testsuite/test_bad2.txt320
-rw-r--r--testsuite/test_bad2.txt.tar.lzbin0 -> 7495 bytes
-rw-r--r--testsuite/tlz_in_tar1.tarbin0 -> 2048 bytes
-rw-r--r--testsuite/tlz_in_tar2.tarbin0 -> 3072 bytes
-rw-r--r--testsuite/ts_in_link.tar.lzbin0 -> 509 bytes
-rw-r--r--testsuite/ug32767.tar.lzbin0 -> 136 bytes
-rw-r--r--testsuite/ug32chars.tar.lzbin0 -> 176 bytes
119 files changed, 13515 insertions, 0 deletions
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..a8fcb34
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1 @@
+Tarlz was written by Antonio Diaz Diaz.
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..4ad17ae
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,338 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) <year> <name of author>
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..77358bd
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,236 @@
+2024-01-03 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.25 released.
+ * New option '--ignore-metadata.
+ * create.cc, decode.cc, decode_lz.cc:
+ '#include <sys/types.h>' for major, minor, makedev on BSD systems.
+ * compress.cc: Reformat file diagnostics as 'PROGRAM: FILE: MESSAGE'.
+ (compress_archive): Create missing intermediate directories.
+ * configure, Makefile.in: New variable 'MAKEINFO'.
+
+2023-09-20 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.24 released.
+ * decode.cc (decode), common_decode.cc (check_skip_filename):
+ Make option '-C' position-dependent also for diff and extract.
+ (Reported by Devon Sean McCullough).
+ * create.cc (encode): Deduct '--uncompressed' from archive name ext.
+ * compress.cc (show_atpos_error): New function showing errno msg.
+ (compress_archive): Exit with error status 2 if archive is empty.
+ * Limit the size of a header set (extended+ustar) to INT_MAX.
+ * check.sh: Fix '--diff' test on OS/2 again. (Reported by Elbert Pol).
+
+2022-09-23 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.23 released.
+ * Create and decode the extended records 'atime' and 'mtime'.
+ * Create and decode the extended records 'uid' and 'gid'.
+ * New option '--ignore-overflow'.
+ * Refuse to read/write archive data from/to a terminal.
+ (Reported by DustDFG).
+ * main.cc (parse_mtime): Make time of day 'HH:MM:SS' optional.
+ Accept both space and 'T' as separator between date and time.
+ (show_option_error): New function showing argument and option name.
+ * decode.cc (extract_member): Diagnose intermediate directory failure.
+ Failure to extract a member is no longer fatal.
+ * decode_lz.cc: Make diagnostics identical to serial decoder.
+ * common_decode.cc (format_member_name): Improve column alignment.
+ * create.cc (fill_headers): Improve diagnostic when stat reports a
+ wrong st_size for a symbolic link. (Reported by Jason Lenz).
+ Change diagnostic "File is the archive" to "Archive can't contain
+ itself" following a similar change made by Paul Eggert to GNU tar.
+ * Don't show "Removing leading '/' from member names." if excluded.
+ * tarlz.texi: Change GNU Texinfo category from 'Data Compression'
+ to 'Archiving' to match that of GNU tar.
+ Use 'end-of-archive' (EOA) instead of 'end-of-file' (EOF).
+ * main.cc (show_help), tarlz.texi: List operations before options.
+ * Many small improvements have been made to code and documentation.
+
+2022-01-05 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.22 released.
+ * main.cc (getnum): Show option name and valid range if error.
+ (check_lib): Check that LZ_API_VERSION and LZ_version_string match.
+ (main): Report an error if -o is used with any operation except -z.
+ * configure: Set variable LIBS.
+
+2021-06-14 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.21 released.
+ * Lzlib 1.12 or newer is now required.
+ * decode.cc (decode): Skip members without name except when listing.
+ decode_lz.cc (dworker): Likewise. (Reported by Florian Schmaus).
+ * New options '-z, --compress' and '-o, --output'.
+ * New option '--warn-newer'.
+ * tarlz.texi (Invoking tarlz): Document concatenation to stdout.
+ * check.sh: Fix the '--diff' test on OS/2. (Reported by Elbert Pol).
+
+2021-01-08 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.19 released.
+ * extended.cc: Print a diagnostic for each unknown keyword found.
+ * tarlz.h: Add a missing '#include <sys/types.h>' for 'mode_t'.
+
+2020-11-21 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.18 released.
+ * main.cc: New option '--check-lib'.
+ * Implement multi-threaded '-x, --extract'.
+ * Don't #include <sys/sysmacros.h> when compiling on OS2.
+ * delete.cc, delete_lz.cc: Use Archive_reader.
+ * extract.cc: Rename to decode.cc.
+ * tarlz.texi: New section 'Limitations of multi-threaded extraction'.
+
+2020-07-30 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.17 released.
+ * New option '--mtime'.
+ * New option '-p, --preserve-permissions'.
+ * Implement multi-threaded '-d, --diff'.
+ * list_lz.cc: Rename to decode_lz.cc.
+ * main.cc (main): Report an error if a file name is empty or if the
+ archive is specified more than once.
+ * lzip_index.cc: Improve messages for corruption in last header.
+ * Don't #include <sys/sysmacros.h> when compiling on BSD.
+ * tarlz.texi: New chapter 'Internal structure of tarlz'.
+
+2019-10-08 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.16 released.
+ * extract.cc (extract_member): Fix call order of chown, chmod.
+ * delete_lz.cc (delete_members_lz): Return 2 if collective member.
+ * main.cc: Set a valid invocation_name even if argc == 0.
+ * #include <sys/sysmacros.h> unconditionally.
+ * tarlz.texi: New chapter 'Portable character set'.
+
+2019-04-11 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.15 released.
+ * New option '--delete' (from uncompressed and --no-solid archives).
+ * list_lz.cc: Fix MT listing of archives with format violations.
+
+2019-03-12 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.14 released.
+ * New option '--exclude'.
+ * New option '-h, --dereference'.
+ * Short option name '-h' no longer means '--help'.
+ * create.cc: Implement '-A, --concatenate' and '-r, --append' to
+ uncompressed archives and to standard output.
+ * main.cc: Port option '--out-slots' from plzip.
+
+2019-02-27 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.13 released.
+ * create_lz.cc (cworker): Fix skipping of unreadable files.
+ * list_lz.cc: Fix listing of archives containing empty lzip members.
+ * create.cc (fill_headers): Store negative mtime as cero.
+
+2019-02-22 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.12 released.
+ * create.cc (fill_headers): Fix use of st_rdev instead of st_dev.
+ * Save just numerical uid/gid if user or group not in database.
+ * extract.cc (format_member_name): Print devmajor and devminor.
+ * New option '-d, --diff'.
+ * New option '--ignore-ids'.
+ * extract.cc: Fast '-t, --list' on seekable uncompressed archives.
+
+2019-02-13 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.11 released.
+ * extract.cc (archive_read): Fix endless loop with empty lz file.
+ * Implement multi-threaded '-c, --create' and '-r, --append'.
+ * '--bsolid' is now the default compression granularity.
+ * create.cc (remove_leading_dotslash): Remember more than one prefix.
+ * tarlz.texi: New chapter 'Minimum archive sizes'.
+
+2019-01-31 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.10 released.
+ * New option '--bsolid'.
+ * New option '-B, --data-size'.
+ * create.cc: Set ustar name to zero if extended header is used.
+
+2019-01-22 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.9 released.
+ * Implement multi-threaded '-t, --list'.
+ * New option '-n, --threads'.
+ * Recognize global pax headers. Ignore them for now.
+ * strtoul has been replaced with length-safe parsers.
+ * tarlz.texi: New chapter 'Limitations of parallel tar decoding'.
+
+2018-12-16 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.8 released.
+ * New option '--anonymous' (--owner=root --group=root).
+ * extract.cc (decode): 'tarlz -xf foo ./bar' now extracts 'bar'.
+ * create.cc: Set to zero most fields in extended headers.
+ * tarlz.texi: New chapter 'Amendments to pax format'.
+
+2018-11-23 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.7 released.
+ * New option '--keep-damaged'.
+ * New option '--no-solid'.
+ * create.cc (archive_write): Minimize dictionary size.
+ Detect and skip archive in '-A', '-c', and '-r'.
+ * main.cc (show_version): Show the version of lzlib being used.
+
+2018-10-19 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.6 released.
+ * New option '-A, --concatenate'.
+ * Option '--ignore-crc' replaced with '--missing-crc'.
+ * create.cc (add_member): Check that uid, gid, mtime, devmajor,
+ and devminor are in ustar range.
+ * configure: Accept appending to CXXFLAGS; 'CXXFLAGS+=OPTIONS'.
+ * Makefile.in: Use tarlz in target 'dist'.
+
+2018-09-29 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.5 released.
+ * Implement simplified POSIX pax format.
+ * Implement CRC32-C (Castagnoli) of the extended header data.
+ * New option '--ignore-crc'.
+ * Add missing #includes for major, minor and makedev.
+ * tarlz.texi: Document the new archive format.
+
+2018-04-23 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.4 released.
+ * Add some missing #includes.
+ * main.cc: Open files in binary mode on OS2.
+
+2018-03-19 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.3 released.
+ * Rename project to 'tarlz' from 'pmtar' (Poor Man's Tar).
+ * New option '-C, --directory'.
+ * Implement lzip compression of members at archive creation.
+ * New option '-r, --append'.
+ * New options '--owner' and '--group'.
+ * New options '--asolid', '--dsolid', and '--solid'.
+ * Implement file appending to compressed archive.
+ * Implement transparent decompression of the archive.
+ * Implement skipping over damaged (un)compressed members.
+ * Implement recursive extraction/listing of directories.
+ * Implement verbose extract/list output.
+ * tarlz.texi: New file.
+
+2014-01-22 Antonio Diaz Diaz <antonio@gnu.org>
+
+ * Version 0.2 released.
+ * configure: Options now accept a separate argument.
+
+2013-02-16 Antonio Diaz Diaz <ant_diaz@teleline.es>
+
+ * Version 0.1 released.
+
+
+Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+This file is a collection of facts, and thus it is not copyrightable,
+but just in case, you have unlimited permission to copy, distribute, and
+modify it.
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000..4de87a7
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,80 @@
+Requirements
+------------
+You will need a C++98 compiler with support for 'long long', and the
+compression library lzlib installed. (gcc 3.3.6 or newer is recommended).
+I use gcc 6.1.0 and 3.3.6, but the code should compile with any standards
+compliant compiler.
+Gcc is available at http://gcc.gnu.org.
+Lzlib is available at http://www.nongnu.org/lzip/lzlib.html.
+
+Lzlib must be version 1.12 or newer.
+
+The operating system must allow signal handlers read access to objects with
+static storage duration so that the cleanup handler for Control-C can delete
+the partial output file in '-z, --compress' mode.
+
+
+Procedure
+---------
+1. Unpack the archive if you have not done so already:
+
+ tar -xf tarlz[version].tar.lz
+or
+ lzip -cd tarlz[version].tar.lz | tar -xf -
+
+This creates the directory ./tarlz[version] containing the source code
+extracted from the archive.
+
+2. Change to tarlz directory and run configure.
+ (Try 'configure --help' for usage instructions).
+
+ cd tarlz[version]
+ ./configure
+
+ To link against a lzlib not installed in a standard place, use:
+
+ ./configure CPPFLAGS='-I <includedir>' LDFLAGS='-L <libdir>'
+
+ (Replace <includedir> with the directory containing the file lzlib.h,
+ and <libdir> with the directory containing the file liblz.a).
+
+3. Run make.
+
+ make
+
+4. Optionally, type 'make check' to run the tests that come with tarlz.
+
+5. Type 'make install' to install the program and any data files and
+ documentation. You need root privileges to install into a prefix owned
+ by root.
+
+ Or type 'make install-compress', which additionally compresses the
+ info manual and the man page after installation.
+ (Installing compressed docs may become the default in the future).
+
+ You can install only the program, the info manual, or the man page by
+ typing 'make install-bin', 'make install-info', or 'make install-man'
+ respectively.
+
+
+Another way
+-----------
+You can also compile tarlz into a separate directory.
+To do this, you must use a version of 'make' that supports the variable
+'VPATH', such as GNU 'make'. 'cd' to the directory where you want the
+object files and executables to go and run the 'configure' script.
+'configure' automatically checks for the source code in '.', in '..', and
+in the directory that 'configure' is in.
+
+'configure' recognizes the option '--srcdir=DIR' to control where to look
+for the source code. Usually 'configure' can determine that directory
+automatically.
+
+After running 'configure', you can run 'make' and 'make install' as
+explained above.
+
+
+Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+This file is free documentation: you have unlimited permission to copy,
+distribute, and modify it.
diff --git a/Makefile.in b/Makefile.in
new file mode 100644
index 0000000..76c1fc8
--- /dev/null
+++ b/Makefile.in
@@ -0,0 +1,179 @@
+
+DISTNAME = $(pkgname)-$(pkgversion)
+INSTALL = install
+INSTALL_PROGRAM = $(INSTALL) -m 755
+INSTALL_DATA = $(INSTALL) -m 644
+INSTALL_DIR = $(INSTALL) -d -m 755
+SHELL = /bin/sh
+CAN_RUN_INSTALLINFO = $(SHELL) -c "install-info --version" > /dev/null 2>&1
+
+objs = arg_parser.o lzip_index.o archive_reader.o common.o common_decode.o \
+ common_mutex.o compress.o create.o create_lz.o decode.o decode_lz.o \
+ delete.o delete_lz.o exclude.o extended.o main.o
+
+
+.PHONY : all install install-bin install-info install-man \
+ install-strip install-compress install-strip-compress \
+ install-bin-strip install-info-compress install-man-compress \
+ uninstall uninstall-bin uninstall-info uninstall-man \
+ doc info man check dist clean distclean
+
+all : $(progname)
+
+$(progname) : $(objs)
+ $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(objs) $(LIBS)
+
+main.o : main.cc
+ $(CXX) $(CPPFLAGS) $(CXXFLAGS) -DPROGVERSION=\"$(pkgversion)\" -c -o $@ $<
+
+%.o : %.cc
+ $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c -o $@ $<
+
+# prevent 'make' from trying to remake source files
+$(VPATH)/configure $(VPATH)/Makefile.in $(VPATH)/doc/$(pkgname).texi : ;
+%.h %.cc : ;
+
+$(objs) : Makefile
+arg_parser.o : arg_parser.h
+archive_reader.o : tarlz.h lzip_index.h archive_reader.h
+common.o : tarlz.h
+common_decode.o : tarlz.h arg_parser.h decode.h
+common_mutex.o : common_mutex.h
+compress.o : tarlz.h arg_parser.h
+create.o : tarlz.h arg_parser.h create.h
+create_lz.o : tarlz.h arg_parser.h common_mutex.h create.h
+decode.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h decode.h
+decode_lz.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h \
+ common_mutex.h decode.h
+delete.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h
+delete_lz.o : tarlz.h arg_parser.h lzip_index.h archive_reader.h
+exclude.o : tarlz.h
+extended.o : tarlz.h
+lzip_index.o : tarlz.h lzip_index.h
+main.o : tarlz.h arg_parser.h
+
+doc : info man
+
+info : $(VPATH)/doc/$(pkgname).info
+
+$(VPATH)/doc/$(pkgname).info : $(VPATH)/doc/$(pkgname).texi
+ cd $(VPATH)/doc && $(MAKEINFO) $(pkgname).texi
+
+man : $(VPATH)/doc/$(progname).1
+
+$(VPATH)/doc/$(progname).1 : $(progname)
+ help2man -n 'creates tar archives with multimember lzip compression' \
+ -o $@ ./$(progname)
+
+Makefile : $(VPATH)/configure $(VPATH)/Makefile.in
+ ./config.status
+
+check : all
+ @$(VPATH)/testsuite/check.sh $(VPATH)/testsuite $(pkgversion)
+
+install : install-bin install-info install-man
+install-strip : install-bin-strip install-info install-man
+install-compress : install-bin install-info-compress install-man-compress
+install-strip-compress : install-bin-strip install-info-compress install-man-compress
+
+install-bin : all
+ if [ ! -d "$(DESTDIR)$(bindir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(bindir)" ; fi
+ $(INSTALL_PROGRAM) ./$(progname) "$(DESTDIR)$(bindir)/$(progname)"
+
+install-bin-strip : all
+ $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install-bin
+
+install-info :
+ if [ ! -d "$(DESTDIR)$(infodir)" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(infodir)" ; fi
+ -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
+ $(INSTALL_DATA) $(VPATH)/doc/$(pkgname).info "$(DESTDIR)$(infodir)/$(pkgname).info"
+ -if $(CAN_RUN_INSTALLINFO) ; then \
+ install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
+ fi
+
+install-info-compress : install-info
+ lzip -v -9 "$(DESTDIR)$(infodir)/$(pkgname).info"
+
+install-man :
+ if [ ! -d "$(DESTDIR)$(mandir)/man1" ] ; then $(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1" ; fi
+ -rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"*
+ $(INSTALL_DATA) $(VPATH)/doc/$(progname).1 "$(DESTDIR)$(mandir)/man1/$(progname).1"
+
+install-man-compress : install-man
+ lzip -v -9 "$(DESTDIR)$(mandir)/man1/$(progname).1"
+
+uninstall : uninstall-man uninstall-info uninstall-bin
+
+uninstall-bin :
+ -rm -f "$(DESTDIR)$(bindir)/$(progname)"
+
+uninstall-info :
+ -if $(CAN_RUN_INSTALLINFO) ; then \
+ install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$(pkgname).info" ; \
+ fi
+ -rm -f "$(DESTDIR)$(infodir)/$(pkgname).info"*
+
+uninstall-man :
+ -rm -f "$(DESTDIR)$(mandir)/man1/$(progname).1"*
+
+dist : doc
+ ln -sf $(VPATH) $(DISTNAME)
+ tarlz --solid --anonymous -9cvf $(DISTNAME).tar.lz \
+ $(DISTNAME)/AUTHORS \
+ $(DISTNAME)/COPYING \
+ $(DISTNAME)/ChangeLog \
+ $(DISTNAME)/INSTALL \
+ $(DISTNAME)/Makefile.in \
+ $(DISTNAME)/NEWS \
+ $(DISTNAME)/README \
+ $(DISTNAME)/configure \
+ $(DISTNAME)/doc/$(progname).1 \
+ $(DISTNAME)/doc/$(pkgname).info \
+ $(DISTNAME)/doc/$(pkgname).texi \
+ $(DISTNAME)/*.h \
+ $(DISTNAME)/*.cc \
+ $(DISTNAME)/testsuite/check.sh \
+ $(DISTNAME)/testsuite/test.txt \
+ $(DISTNAME)/testsuite/test.txt.tar \
+ $(DISTNAME)/testsuite/test_bad1.txt.tar \
+ $(DISTNAME)/testsuite/test_bad[12].txt \
+ $(DISTNAME)/testsuite/rfoo \
+ $(DISTNAME)/testsuite/rbar \
+ $(DISTNAME)/testsuite/rbaz \
+ $(DISTNAME)/testsuite/test3.tar \
+ $(DISTNAME)/testsuite/test3_nn.tar \
+ $(DISTNAME)/testsuite/test3_eoa[1-4].tar \
+ $(DISTNAME)/testsuite/test3_gh[1-4].tar \
+ $(DISTNAME)/testsuite/test3_bad[1-5].tar \
+ $(DISTNAME)/testsuite/test3_dir.tar \
+ $(DISTNAME)/testsuite/t155.tar \
+ $(DISTNAME)/testsuite/t155_fv[1-3].tar \
+ $(DISTNAME)/testsuite/eoa_blocks.tar \
+ $(DISTNAME)/testsuite/test.txt.lz \
+ $(DISTNAME)/testsuite/test.txt.tar.lz \
+ $(DISTNAME)/testsuite/test_bad[12].txt.tar.lz \
+ $(DISTNAME)/testsuite/test3.tar.lz \
+ $(DISTNAME)/testsuite/test3_eoa[1-5].tar.lz \
+ $(DISTNAME)/testsuite/test3_em[1-6].tar.lz \
+ $(DISTNAME)/testsuite/test3_gh[1-6].tar.lz \
+ $(DISTNAME)/testsuite/test3_nn.tar.lz \
+ $(DISTNAME)/testsuite/test3_sm[1-4].tar.lz \
+ $(DISTNAME)/testsuite/test3_bad[1-6].tar.lz \
+ $(DISTNAME)/testsuite/test3_dir.tar.lz \
+ $(DISTNAME)/testsuite/test3_dot.tar.lz \
+ $(DISTNAME)/testsuite/tar_in_tlz[12].tar.lz \
+ $(DISTNAME)/testsuite/tlz_in_tar[12].tar \
+ $(DISTNAME)/testsuite/ts_in_link.tar.lz \
+ $(DISTNAME)/testsuite/t155.tar.lz \
+ $(DISTNAME)/testsuite/t155_fv[1-6].tar.lz \
+ $(DISTNAME)/testsuite/dotdot[1-5].tar.lz \
+ $(DISTNAME)/testsuite/ug32767.tar.lz \
+ $(DISTNAME)/testsuite/ug32chars.tar.lz \
+ $(DISTNAME)/testsuite/eoa_blocks.tar.lz
+ rm -f $(DISTNAME)
+
+clean :
+ -rm -f $(progname) $(objs)
+
+distclean : clean
+ -rm -f Makefile config.status *.tar *.tar.lz
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..108d6ea
--- /dev/null
+++ b/NEWS
@@ -0,0 +1,14 @@
+Changes in version 0.25:
+
+The new option '--ignore-metadata', which makes '-d, --diff' ignore
+differences in file permissions, owner and group IDs, and modification time,
+has been added.
+
+'#include <sys/types.h>' for major, minor, makedev on BSD systems.
+
+File diagnostics of '-z' have been reformatted as 'PROGRAM: FILE: MESSAGE'.
+
+The option '-o, --output' now creates missing intermediate directories when
+compressing to a file.
+
+The variable MAKEINFO has been added to configure and Makefile.in.
diff --git a/README b/README
new file mode 100644
index 0000000..5f1dedb
--- /dev/null
+++ b/README
@@ -0,0 +1,96 @@
+Description
+
+Tarlz is a massively parallel (multi-threaded) combined implementation of
+the tar archiver and the lzip compressor. Tarlz uses the compression library
+lzlib.
+
+Tarlz creates tar archives using a simplified and safer variant of the POSIX
+pax format compressed in lzip format, keeping the alignment between tar
+members and lzip members. The resulting multimember tar.lz archive is
+backward compatible with standard tar tools like GNU tar, which treat it
+like any other tar.lz archive. Tarlz can append files to the end of such
+compressed archives.
+
+Keeping the alignment between tar members and lzip members has two
+advantages. It adds an indexed lzip layer on top of the tar archive, making
+it possible to decode the archive safely in parallel. It also minimizes the
+amount of data lost in case of corruption. Compressing a tar archive with
+plzip may even double the amount of files lost for each lzip member damaged
+because it does not keep the members aligned.
+
+Tarlz can create tar archives with five levels of compression granularity:
+per file (--no-solid), per block (--bsolid, default), per directory
+(--dsolid), appendable solid (--asolid), and solid (--solid). It can also
+create uncompressed tar archives.
+
+Of course, compressing each file (or each directory) individually can't
+achieve a compression ratio as high as compressing solidly the whole tar
+archive, but it has the following advantages:
+
+ * The resulting multimember tar.lz archive can be decompressed in
+ parallel, multiplying the decompression speed.
+
+ * New members can be appended to the archive (by removing the
+ end-of-archive member), and unwanted members can be deleted from the
+ archive. Just like an uncompressed tar archive.
+
+ * It is a safe POSIX-style backup format. In case of corruption, tarlz
+ can extract all the undamaged members from the tar.lz archive,
+ skipping over the damaged members, just like the standard
+ (uncompressed) tar. Moreover, the option '--keep-damaged' can be used
+ to recover as much data as possible from each damaged member, and
+ lziprecover can be used to recover some of the damaged members.
+
+ * A multimember tar.lz archive is usually smaller than the corresponding
+ solidly compressed tar.gz archive, except when individually
+ compressing files smaller than about 32 KiB.
+
+Note that the POSIX pax format has a serious flaw. The metadata stored in
+pax extended records are not protected by any kind of check sequence.
+Corruption in a long file name may cause the extraction of the file in the
+wrong place without warning. Corruption in a large file size may cause the
+truncation of the file or the appending of garbage to the file, both
+followed by a spurious warning about a corrupt header far from the place of
+the undetected corruption.
+
+Metadata like file name and file size must be always protected in an archive
+format because of the adverse effects of undetected corruption in them,
+potentially much worse that undetected corruption in the data. Even more so
+in the case of pax because the amount of metadata it stores is potentially
+large, making undetected corruption and archiver misbehavior more probable.
+
+Headers and metadata must be protected separately from data because the
+integrity checking of lzip may not be able to detect the corruption before
+the metadata have been used, for example, to create a new file in the wrong
+place.
+
+Because of the above, tarlz protects the extended records with a Cyclic
+Redundancy Check (CRC) in a way compatible with standard tar tools.
+
+Tarlz does not understand other tar formats like gnu, oldgnu, star, or v7.
+The command 'tarlz -t -f archive.tar.lz > /dev/null' can be used to check
+that the format of the archive is compatible with tarlz.
+
+The diagram below shows the correspondence between each tar member (formed
+by one or two headers plus optional data) in the tar archive and each lzip
+member in the resulting multimember tar.lz archive, when per file
+compression is used:
+
+tar
++========+======+=================+===============+========+======+========+
+| header | data | extended header | extended data | header | data | EOA |
++========+======+=================+===============+========+======+========+
+
+tar.lz
++===============+=================================================+========+
+| member | member | member |
++===============+=================================================+========+
+
+
+Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+This file is free documentation: you have unlimited permission to copy,
+distribute, and modify it.
+
+The file Makefile.in is a data file used by configure to produce the Makefile.
+It has the same copyright owner and permissions that configure itself.
diff --git a/archive_reader.cc b/archive_reader.cc
new file mode 100644
index 0000000..c4438ae
--- /dev/null
+++ b/archive_reader.cc
@@ -0,0 +1,273 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <stdint.h> // for lzlib.h
+#include <unistd.h>
+#include <lzlib.h>
+
+#include "tarlz.h"
+#include "lzip_index.h"
+#include "archive_reader.h"
+
+
+namespace {
+
+const char * const rdaerr_msg = "Error reading archive";
+
+/* Return the number of bytes really read.
+ If (value returned < size) and (errno == 0), means EOF was reached.
+*/
+int preadblock( const int fd, uint8_t * const buf, const int size,
+ const long long pos )
+ {
+ int sz = 0;
+ errno = 0;
+ while( sz < size )
+ {
+ const int n = pread( fd, buf + sz, size - sz, pos + sz );
+ if( n > 0 ) sz += n;
+ else if( n == 0 ) break; // EOF
+ else if( errno != EINTR ) break;
+ errno = 0;
+ }
+ return sz;
+ }
+
+int non_tty_infd( const std::string & archive_name, const char * const namep )
+ {
+ int infd = archive_name.empty() ? STDIN_FILENO : open_instream( archive_name );
+ if( infd >= 0 && isatty( infd ) ) // for example /dev/tty
+ { show_file_error( namep, archive_name.empty() ?
+ "I won't read archive data from a terminal (missing -f option?)" :
+ "I won't read archive data from a terminal." );
+ close( infd ); infd = -1; }
+ return infd;
+ }
+
+
+void xLZ_decompress_write( LZ_Decoder * const decoder,
+ const uint8_t * const buffer, const int size )
+ {
+ if( LZ_decompress_write( decoder, buffer, size ) != size )
+ internal_error( "library error (LZ_decompress_write)." );
+ }
+
+} // end namespace
+
+
+Archive_descriptor::Archive_descriptor( const std::string & archive_name )
+ : name( archive_name ), namep( name.empty() ? "(stdin)" : name.c_str() ),
+ infd( non_tty_infd( archive_name, namep ) ),
+ lzip_index( infd ),
+ seekable( lseek( infd, 0, SEEK_SET ) == 0 ),
+ indexed( seekable && lzip_index.retval() == 0 ) {}
+
+
+int Archive_reader_base::parse_records( Extended & extended,
+ const Tar_header header,
+ Resizable_buffer & rbuf,
+ const char * const default_msg,
+ const bool permissive )
+ {
+ const long long edsize = parse_octal( header + size_o, size_l );
+ const long long bufsize = round_up( edsize );
+ if( edsize <= 0 ) return err( 2, misrec_msg ); // no extended records
+ if( edsize >= 1LL << 33 || bufsize > max_edata_size )
+ return err( -2, longrec_msg ); // records too long
+ if( !rbuf.resize( bufsize ) ) return err( -1, mem_msg );
+ e_msg_ = ""; e_code_ = 0;
+ int retval = read( rbuf.u8(), bufsize ); // extended records buffer
+ if( retval == 0 && !extended.parse( rbuf(), edsize, permissive ) )
+ retval = 2;
+ if( retval && !*e_msg_ ) e_msg_ = default_msg;
+ return retval;
+ }
+
+
+/* Read 'size' uncompressed bytes, decompressing the input if needed.
+ Return value: 0 = OK, 1 = OOM or read error, 2 = EOF or invalid data. */
+int Archive_reader::read( uint8_t * const buf, const int size )
+ {
+ if( first_read ) // check format
+ {
+ first_read = false;
+ uncompressed_seekable = ad.seekable && !ad.indexed &&
+ ad.lzip_index.file_size() > 3 * header_size;
+ if( size != header_size )
+ internal_error( "size != header_size on first call." );
+ const int rd = readblock( ad.infd, buf, size );
+ if( rd != size && errno ) return err( -1, rdaerr_msg, errno, rd );
+ const Lzip_header & header = (*(const Lzip_header *)buf);
+ const bool islz = ( rd >= min_member_size && header.check_magic() &&
+ header.check_version() &&
+ isvalid_ds( header.dictionary_size() ) );
+ const bool istar = ( rd == size && check_ustar_chksum( buf ) );
+ const bool iseoa =
+ ( !islz && !istar && rd == size && block_is_zero( buf, size ) );
+ bool maybe_lz = islz; // maybe corrupt tar.lz
+ if( !islz && !istar && !iseoa ) // corrupt or invalid format
+ {
+ const bool lz_ext = has_lz_ext( ad.name );
+ show_file_error( ad.namep, lz_ext ? posix_lz_msg : posix_msg );
+ if( lz_ext && rd >= min_member_size ) maybe_lz = true;
+ else return err( 2 );
+ }
+ if( !maybe_lz ) // uncompressed
+ { if( rd == size ) return 0;
+ return err( -2, "EOF reading archive.", 0, rd ); }
+ uncompressed_seekable = false; // compressed
+ decoder = LZ_decompress_open();
+ if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
+ { LZ_decompress_close( decoder ); decoder = 0; return err( -1, mem_msg ); }
+ xLZ_decompress_write( decoder, buf, rd );
+ const int ret = read( buf, size ); if( ret != 0 ) return ret;
+ if( check_ustar_chksum( buf ) || block_is_zero( buf, size ) ) return 0;
+ return err( 2, islz ? posix_lz_msg : "" );
+ }
+
+ if( !decoder ) // uncompressed
+ {
+ const int rd = readblock( ad.infd, buf, size );
+ if( rd == size ) return 0; else return err( -2, end_msg, 0, rd );
+ }
+ const int ibuf_size = 16384;
+ uint8_t ibuf[ibuf_size];
+ int sz = 0;
+ while( sz < size )
+ {
+ const int rd = LZ_decompress_read( decoder, buf + sz, size - sz );
+ if( rd < 0 )
+ {
+ const unsigned long long old_pos = LZ_decompress_total_in_size( decoder );
+ if( LZ_decompress_sync_to_member( decoder ) < 0 )
+ internal_error( "library error (LZ_decompress_sync_to_member)." );
+ e_skip_ = true; set_error_status( 2 );
+ const unsigned long long new_pos = LZ_decompress_total_in_size( decoder );
+ // lzlib < 1.8 does not update total_in_size when syncing to member
+ if( new_pos >= old_pos && new_pos < LLONG_MAX )
+ return err( 2, "", 0, sz, true );
+ return err( -1, "Skipping to next header failed. "
+ "Lzlib 1.8 or newer required.", 0, sz );
+ }
+ if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
+ { return err( -2, end_msg, 0, sz ); }
+ sz += rd;
+ if( sz < size && !at_eof && LZ_decompress_write_size( decoder ) > 0 )
+ {
+ const int rsize = std::min( ibuf_size, LZ_decompress_write_size( decoder ) );
+ const int rd = readblock( ad.infd, ibuf, rsize );
+ xLZ_decompress_write( decoder, ibuf, rd );
+ if( rd < rsize )
+ {
+ at_eof = true; LZ_decompress_finish( decoder );
+ if( errno ) return err( -1, rdaerr_msg, errno, sz );
+ }
+ }
+ }
+ return 0;
+ }
+
+
+int Archive_reader::skip_member( const Extended & extended )
+ {
+ if( extended.file_size() <= 0 ) return 0;
+ long long rest = round_up( extended.file_size() ); // size + padding
+ if( uncompressed_seekable && lseek( ad.infd, rest, SEEK_CUR ) > 0 ) return 0;
+ const int bufsize = 32 * header_size;
+ uint8_t buf[bufsize];
+ while( rest > 0 ) // skip tar member
+ {
+ const int rsize = ( rest >= bufsize ) ? bufsize : rest;
+ const int ret = read( buf, rsize );
+ if( ret != 0 ) return ret;
+ rest -= rsize;
+ }
+ return 0;
+ }
+
+
+void Archive_reader_i::set_member( const long i )
+ {
+ LZ_decompress_reset( decoder ); // prepare for new member
+ data_pos_ = ad.lzip_index.dblock( i ).pos();
+ mdata_end_ = ad.lzip_index.dblock( i ).end();
+ archive_pos = ad.lzip_index.mblock( i ).pos();
+ member_id = i;
+ }
+
+
+/* Read 'size' decompressed bytes from the archive.
+ Return value: 0 = OK, 1 = OOM or read error, 2 = EOF or invalid data. */
+int Archive_reader_i::read( uint8_t * const buf, const int size )
+ {
+ int sz = 0;
+
+ while( sz < size )
+ {
+ const int rd = LZ_decompress_read( decoder, buf + sz, size - sz );
+ if( rd < 0 )
+ return err( 2, LZ_strerror( LZ_decompress_errno( decoder ) ), 0, sz );
+ if( rd == 0 && LZ_decompress_finished( decoder ) == 1 )
+ return err( -2, end_msg, 0, sz );
+ sz += rd; data_pos_ += rd;
+ if( sz < size && LZ_decompress_write_size( decoder ) > 0 )
+ {
+ const long long ibuf_size = 16384;
+ uint8_t ibuf[ibuf_size];
+ const long long member_end = ad.lzip_index.mblock( member_id ).end();
+ const long long rest = ( ( archive_pos < member_end ) ?
+ member_end : ad.lzip_index.cdata_size() ) - archive_pos;
+ const int rsize = std::min( LZ_decompress_write_size( decoder ),
+ (int)std::min( ibuf_size, rest ) );
+ if( rsize <= 0 ) LZ_decompress_finish( decoder );
+ else
+ {
+ const int rd = preadblock( ad.infd, ibuf, rsize, archive_pos );
+ xLZ_decompress_write( decoder, ibuf, rd );
+ archive_pos += rd;
+ if( rd < rsize )
+ {
+ LZ_decompress_finish( decoder );
+ if( errno ) return err( -1, rdaerr_msg, errno, sz );
+ }
+ }
+ }
+ }
+ return 0;
+ }
+
+
+int Archive_reader_i::skip_member( const Extended & extended )
+ {
+ if( extended.file_size() <= 0 ) return 0;
+ long long rest = round_up( extended.file_size() ); // size + padding
+ if( data_pos_ + rest == mdata_end_ ) { data_pos_ = mdata_end_; return 0; }
+ const int bufsize = 32 * header_size;
+ uint8_t buf[bufsize];
+ while( rest > 0 ) // skip tar member
+ {
+ const int rsize = ( rest >= bufsize ) ? bufsize : rest;
+ const int ret = read( buf, rsize );
+ if( ret != 0 ) return ret;
+ rest -= rsize;
+ }
+ return 0;
+ }
diff --git a/archive_reader.h b/archive_reader.h
new file mode 100644
index 0000000..e8963e0
--- /dev/null
+++ b/archive_reader.h
@@ -0,0 +1,120 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+struct Archive_descriptor
+ {
+ const std::string name;
+ const char * const namep; // printable archive name
+ const int infd;
+ const Lzip_index lzip_index;
+ const bool seekable;
+ const bool indexed; // archive is a compressed regular file
+
+ Archive_descriptor( const std::string & archive_name );
+ };
+
+
+class Archive_reader_base // base of serial and indexed readers
+ {
+public:
+ const Archive_descriptor & ad;
+protected:
+ LZ_Decoder * decoder; // destructor closes it if needed
+ const char * e_msg_; // message for show_file_error
+ int e_code_; // copy of errno
+ int e_size_; // partial size read in case of read error
+ bool e_skip_; // corrupt header skipped
+ bool fatal_;
+
+ int err( const int retval, const char * const msg = "", const int code = 0,
+ const int size = 0, const bool skip = false )
+ { e_msg_ = msg; e_code_ = code; e_size_ = size; e_skip_ = skip;
+ if( retval >= 0 ) return retval;
+ fatal_ = true; if( !*e_msg_ ) e_msg_ = "Fatal error"; return -retval; }
+
+ Archive_reader_base( const Archive_descriptor & d )
+ : ad( d ), decoder( 0 ), e_msg_( "" ), e_code_( 0 ), e_size_( 0 ),
+ e_skip_( false ), fatal_( false ) {}
+
+public:
+ virtual ~Archive_reader_base()
+ { if( decoder != 0 ) LZ_decompress_close( decoder ); }
+
+ const char * e_msg() const { return e_msg_; }
+ int e_code() const { return e_code_; }
+ int e_size() const { return e_size_; }
+ bool e_skip() const { return e_skip_; }
+ bool fatal() const { return fatal_; }
+
+ /* Read 'size' uncompressed bytes, decompressing the input if needed.
+ Return value: 0 = OK, 1 = OOM or read error, 2 = EOF or invalid data.
+ If !OK, fills all the e_* variables. */
+ virtual int read( uint8_t * const buf, const int size ) = 0;
+
+ int parse_records( Extended & extended, const Tar_header header,
+ Resizable_buffer & rbuf, const char * const default_msg,
+ const bool permissive );
+ };
+
+
+class Archive_reader : public Archive_reader_base // serial reader
+ {
+ bool first_read;
+ bool uncompressed_seekable; // value set by first read call
+ bool at_eof;
+
+public:
+ Archive_reader( const Archive_descriptor & d )
+ : Archive_reader_base( d ), first_read( true ),
+ uncompressed_seekable( false ), at_eof( false ) {}
+
+ int read( uint8_t * const buf, const int size );
+ int skip_member( const Extended & extended );
+ };
+
+
+/* If the archive is compressed seekable (indexed), several indexed readers
+ can be constructed sharing the same Archive_descriptor, for example to
+ decode the archive in parallel.
+*/
+class Archive_reader_i : public Archive_reader_base // indexed reader
+ {
+ long long data_pos_; // current decompressed position in archive
+ long long mdata_end_; // current member decompressed end
+ long long archive_pos; // current position in archive for pread
+ long member_id; // current member unless reading beyond
+
+public:
+ Archive_reader_i( const Archive_descriptor & d )
+ : Archive_reader_base( d ),
+ data_pos_( 0 ), mdata_end_( 0 ), archive_pos( 0 ), member_id( 0 )
+ {
+ decoder = LZ_decompress_open();
+ if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok )
+ { LZ_decompress_close( decoder ); decoder = 0; fatal_ = true; }
+ }
+
+ long long data_pos() const { return data_pos_; }
+ long long mdata_end() const { return mdata_end_; }
+ bool at_member_end() const { return data_pos_ == mdata_end_; }
+
+ // Resets decoder and sets position to the start of the member.
+ void set_member( const long i );
+
+ int read( uint8_t * const buf, const int size );
+ int skip_member( const Extended & extended );
+ };
diff --git a/arg_parser.cc b/arg_parser.cc
new file mode 100644
index 0000000..0c04d8e
--- /dev/null
+++ b/arg_parser.cc
@@ -0,0 +1,197 @@
+/* Arg_parser - POSIX/GNU command-line argument parser. (C++ version)
+ Copyright (C) 2006-2024 Antonio Diaz Diaz.
+
+ This library is free software. Redistribution and use in source and
+ binary forms, with or without modification, are permitted provided
+ that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions, and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#include <cstring>
+#include <string>
+#include <vector>
+
+#include "arg_parser.h"
+
+
+bool Arg_parser::parse_long_option( const char * const opt, const char * const arg,
+ const Option options[], int & argind )
+ {
+ unsigned len;
+ int index = -1;
+ bool exact = false, ambig = false;
+
+ for( len = 0; opt[len+2] && opt[len+2] != '='; ++len ) ;
+
+ // Test all long options for either exact match or abbreviated matches.
+ for( int i = 0; options[i].code != 0; ++i )
+ if( options[i].long_name &&
+ std::strncmp( options[i].long_name, &opt[2], len ) == 0 )
+ {
+ if( std::strlen( options[i].long_name ) == len ) // Exact match found
+ { index = i; exact = true; break; }
+ else if( index < 0 ) index = i; // First nonexact match found
+ else if( options[index].code != options[i].code ||
+ options[index].has_arg != options[i].has_arg )
+ ambig = true; // Second or later nonexact match found
+ }
+
+ if( ambig && !exact )
+ {
+ error_ = "option '"; error_ += opt; error_ += "' is ambiguous";
+ return false;
+ }
+
+ if( index < 0 ) // nothing found
+ {
+ error_ = "unrecognized option '"; error_ += opt; error_ += '\'';
+ return false;
+ }
+
+ ++argind;
+ data.push_back( Record( options[index].code, options[index].long_name ) );
+
+ if( opt[len+2] ) // '--<long_option>=<argument>' syntax
+ {
+ if( options[index].has_arg == no )
+ {
+ error_ = "option '--"; error_ += options[index].long_name;
+ error_ += "' doesn't allow an argument";
+ return false;
+ }
+ if( options[index].has_arg == yes && !opt[len+3] )
+ {
+ error_ = "option '--"; error_ += options[index].long_name;
+ error_ += "' requires an argument";
+ return false;
+ }
+ data.back().argument = &opt[len+3];
+ return true;
+ }
+
+ if( options[index].has_arg == yes )
+ {
+ if( !arg || !arg[0] )
+ {
+ error_ = "option '--"; error_ += options[index].long_name;
+ error_ += "' requires an argument";
+ return false;
+ }
+ ++argind; data.back().argument = arg;
+ return true;
+ }
+
+ return true;
+ }
+
+
+bool Arg_parser::parse_short_option( const char * const opt, const char * const arg,
+ const Option options[], int & argind )
+ {
+ int cind = 1; // character index in opt
+
+ while( cind > 0 )
+ {
+ int index = -1;
+ const unsigned char c = opt[cind];
+
+ if( c != 0 )
+ for( int i = 0; options[i].code; ++i )
+ if( c == options[i].code )
+ { index = i; break; }
+
+ if( index < 0 )
+ {
+ error_ = "invalid option -- '"; error_ += c; error_ += '\'';
+ return false;
+ }
+
+ data.push_back( Record( c ) );
+ if( opt[++cind] == 0 ) { ++argind; cind = 0; } // opt finished
+
+ if( options[index].has_arg != no && cind > 0 && opt[cind] )
+ {
+ data.back().argument = &opt[cind]; ++argind; cind = 0;
+ }
+ else if( options[index].has_arg == yes )
+ {
+ if( !arg || !arg[0] )
+ {
+ error_ = "option requires an argument -- '"; error_ += c;
+ error_ += '\'';
+ return false;
+ }
+ data.back().argument = arg; ++argind; cind = 0;
+ }
+ }
+ return true;
+ }
+
+
+Arg_parser::Arg_parser( const int argc, const char * const argv[],
+ const Option options[], const bool in_order )
+ {
+ if( argc < 2 || !argv || !options ) return;
+
+ std::vector< const char * > non_options; // skipped non-options
+ int argind = 1; // index in argv
+
+ while( argind < argc )
+ {
+ const unsigned char ch1 = argv[argind][0];
+ const unsigned char ch2 = ch1 ? argv[argind][1] : 0;
+
+ if( ch1 == '-' && ch2 ) // we found an option
+ {
+ const char * const opt = argv[argind];
+ const char * const arg = ( argind + 1 < argc ) ? argv[argind+1] : 0;
+ if( ch2 == '-' )
+ {
+ if( !argv[argind][2] ) { ++argind; break; } // we found "--"
+ else if( !parse_long_option( opt, arg, options, argind ) ) break;
+ }
+ else if( !parse_short_option( opt, arg, options, argind ) ) break;
+ }
+ else
+ {
+ if( in_order ) data.push_back( Record( argv[argind++] ) );
+ else non_options.push_back( argv[argind++] );
+ }
+ }
+ if( !error_.empty() ) data.clear();
+ else
+ {
+ for( unsigned i = 0; i < non_options.size(); ++i )
+ data.push_back( Record( non_options[i] ) );
+ while( argind < argc )
+ data.push_back( Record( argv[argind++] ) );
+ }
+ }
+
+
+Arg_parser::Arg_parser( const char * const opt, const char * const arg,
+ const Option options[] )
+ {
+ if( !opt || !opt[0] || !options ) return;
+
+ if( opt[0] == '-' && opt[1] ) // we found an option
+ {
+ int argind = 1; // dummy
+ if( opt[1] == '-' )
+ { if( opt[2] ) parse_long_option( opt, arg, options, argind ); }
+ else
+ parse_short_option( opt, arg, options, argind );
+ if( !error_.empty() ) data.clear();
+ }
+ else data.push_back( Record( opt ) );
+ }
diff --git a/arg_parser.h b/arg_parser.h
new file mode 100644
index 0000000..1eeec9a
--- /dev/null
+++ b/arg_parser.h
@@ -0,0 +1,110 @@
+/* Arg_parser - POSIX/GNU command-line argument parser. (C++ version)
+ Copyright (C) 2006-2024 Antonio Diaz Diaz.
+
+ This library is free software. Redistribution and use in source and
+ binary forms, with or without modification, are permitted provided
+ that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions, and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+/* Arg_parser reads the arguments in 'argv' and creates a number of
+ option codes, option arguments, and non-option arguments.
+
+ In case of error, 'error' returns a non-empty error message.
+
+ 'options' is an array of 'struct Option' terminated by an element
+ containing a code which is zero. A null long_name means a short-only
+ option. A code value outside the unsigned char range means a long-only
+ option.
+
+ Arg_parser normally makes it appear as if all the option arguments
+ were specified before all the non-option arguments for the purposes
+ of parsing, even if the user of your program intermixed option and
+ non-option arguments. If you want the arguments in the exact order
+ the user typed them, call 'Arg_parser' with 'in_order' = true.
+
+ The argument '--' terminates all options; any following arguments are
+ treated as non-option arguments, even if they begin with a hyphen.
+
+ The syntax for optional option arguments is '-<short_option><argument>'
+ (without whitespace), or '--<long_option>=<argument>'.
+*/
+
+class Arg_parser
+ {
+public:
+ enum Has_arg { no, yes, maybe };
+
+ struct Option
+ {
+ int code; // Short option letter or code ( code != 0 )
+ const char * long_name; // Long option name (maybe null)
+ Has_arg has_arg;
+ };
+
+private:
+ struct Record
+ {
+ int code;
+ std::string parsed_name;
+ std::string argument;
+ explicit Record( const unsigned char c )
+ : code( c ), parsed_name( "-" ) { parsed_name += c; }
+ Record( const int c, const char * const long_name )
+ : code( c ), parsed_name( "--" ) { parsed_name += long_name; }
+ explicit Record( const char * const arg ) : code( 0 ), argument( arg ) {}
+ };
+
+ const std::string empty_arg;
+ std::string error_;
+ std::vector< Record > data;
+
+ bool parse_long_option( const char * const opt, const char * const arg,
+ const Option options[], int & argind );
+ bool parse_short_option( const char * const opt, const char * const arg,
+ const Option options[], int & argind );
+
+public:
+ Arg_parser( const int argc, const char * const argv[],
+ const Option options[], const bool in_order = false );
+
+ // Restricted constructor. Parses a single token and argument (if any).
+ Arg_parser( const char * const opt, const char * const arg,
+ const Option options[] );
+
+ const std::string & error() const { return error_; }
+
+ // The number of arguments parsed. May be different from argc.
+ int arguments() const { return data.size(); }
+
+ /* If code( i ) is 0, argument( i ) is a non-option.
+ Else argument( i ) is the option's argument (or empty). */
+ int code( const int i ) const
+ {
+ if( i >= 0 && i < arguments() ) return data[i].code;
+ else return 0;
+ }
+
+ // Full name of the option parsed (short or long).
+ const std::string & parsed_name( const int i ) const
+ {
+ if( i >= 0 && i < arguments() ) return data[i].parsed_name;
+ else return empty_arg;
+ }
+
+ const std::string & argument( const int i ) const
+ {
+ if( i >= 0 && i < arguments() ) return data[i].argument;
+ else return empty_arg;
+ }
+ };
diff --git a/common.cc b/common.cc
new file mode 100644
index 0000000..b653e01
--- /dev/null
+++ b/common.cc
@@ -0,0 +1,72 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cctype>
+#include <cerrno>
+#include <unistd.h>
+
+#include "tarlz.h"
+
+
+unsigned long long parse_octal( const uint8_t * const ptr, const int size )
+ {
+ unsigned long long result = 0;
+ int i = 0;
+ while( i < size && std::isspace( ptr[i] ) ) ++i;
+ for( ; i < size && ptr[i] >= '0' && ptr[i] <= '7'; ++i )
+ { result <<= 3; result += ptr[i] - '0'; }
+ return result;
+ }
+
+
+/* Return the number of bytes really read.
+ If (value returned < size) and (errno == 0), means EOF was reached.
+*/
+int readblock( const int fd, uint8_t * const buf, const int size )
+ {
+ int sz = 0;
+ errno = 0;
+ while( sz < size )
+ {
+ const int n = read( fd, buf + sz, size - sz );
+ if( n > 0 ) sz += n;
+ else if( n == 0 ) break; // EOF
+ else if( errno != EINTR ) break;
+ errno = 0;
+ }
+ return sz;
+ }
+
+
+/* Return the number of bytes really written.
+ If (value returned < size), it is always an error.
+*/
+int writeblock( const int fd, const uint8_t * const buf, const int size )
+ {
+ int sz = 0;
+ errno = 0;
+ while( sz < size )
+ {
+ const int n = write( fd, buf + sz, size - sz );
+ if( n > 0 ) sz += n;
+ else if( n < 0 && errno != EINTR ) break;
+ errno = 0;
+ }
+ return sz;
+ }
diff --git a/common_decode.cc b/common_decode.cc
new file mode 100644
index 0000000..a0ff89d
--- /dev/null
+++ b/common_decode.cc
@@ -0,0 +1,255 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cerrno>
+#include <cstdio>
+#include <ctime>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "tarlz.h"
+#include "arg_parser.h"
+#include "decode.h"
+
+
+namespace {
+
+enum { mode_string_size = 10,
+ group_string_size = 1 + uname_l + 1 + gname_l + 1 }; // 67
+
+void format_mode_string( const Tar_header header, char buf[mode_string_size] )
+ {
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+
+ std::memcpy( buf, "----------", mode_string_size );
+ switch( typeflag )
+ {
+ case tf_regular: break;
+ case tf_link: buf[0] = 'h'; break;
+ case tf_symlink: buf[0] = 'l'; break;
+ case tf_chardev: buf[0] = 'c'; break;
+ case tf_blockdev: buf[0] = 'b'; break;
+ case tf_directory: buf[0] = 'd'; break;
+ case tf_fifo: buf[0] = 'p'; break;
+ case tf_hiperf: buf[0] = 'C'; break;
+ default: buf[0] = '?';
+ }
+ const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
+ const bool setuid = mode & S_ISUID;
+ const bool setgid = mode & S_ISGID;
+ const bool sticky = mode & S_ISVTX;
+ if( mode & S_IRUSR ) buf[1] = 'r';
+ if( mode & S_IWUSR ) buf[2] = 'w';
+ if( mode & S_IXUSR ) buf[3] = setuid ? 's' : 'x';
+ else if( setuid ) buf[3] = 'S';
+ if( mode & S_IRGRP ) buf[4] = 'r';
+ if( mode & S_IWGRP ) buf[5] = 'w';
+ if( mode & S_IXGRP ) buf[6] = setgid ? 's' : 'x';
+ else if( setgid ) buf[6] = 'S';
+ if( mode & S_IROTH ) buf[7] = 'r';
+ if( mode & S_IWOTH ) buf[8] = 'w';
+ if( mode & S_IXOTH ) buf[9] = sticky ? 't' : 'x';
+ else if( sticky ) buf[9] = 'T';
+ }
+
+
+int format_user_group_string( const Extended & extended,
+ const Tar_header header,
+ char buf[group_string_size] )
+ {
+ int len;
+ if( header[uname_o] && header[gname_o] )
+ len = snprintf( buf, group_string_size,
+ " %.32s/%.32s", header + uname_o, header + gname_o );
+ else
+ len = snprintf( buf, group_string_size, " %llu/%llu",
+ extended.get_uid(), extended.get_gid() );
+ return len;
+ }
+
+
+// return true if dir is a parent directory of name
+bool compare_prefix_dir( const char * const dir, const char * const name )
+ {
+ int len = 0;
+ while( dir[len] && dir[len] == name[len] ) ++len;
+ return ( !dir[len] && len > 0 && ( dir[len-1] == '/' || name[len] == '/' ) );
+ }
+
+
+// compare two file names ignoring trailing slashes
+bool compare_tslash( const char * const name1, const char * const name2 )
+ {
+ const char * p = name1;
+ const char * q = name2;
+ while( *p && *p == *q ) { ++p; ++q; }
+ while( *p == '/' ) ++p;
+ while( *q == '/' ) ++q;
+ return ( !*p && !*q );
+ }
+
+} // end namespace
+
+
+bool block_is_zero( const uint8_t * const buf, const int size )
+ {
+ for( int i = 0; i < size; ++i ) if( buf[i] != 0 ) return false;
+ return true;
+ }
+
+
+bool format_member_name( const Extended & extended, const Tar_header header,
+ Resizable_buffer & rbuf, const bool long_format )
+ {
+ if( long_format )
+ {
+ format_mode_string( header, rbuf() );
+ const int group_string_len =
+ format_user_group_string( extended, header, rbuf() + mode_string_size );
+ int offset = mode_string_size + group_string_len;
+ const time_t mtime = extended.mtime().sec();
+ struct tm t;
+ if( !localtime_r( &mtime, &t ) ) // if local time fails
+ { time_t z = 0; if( !gmtime_r( &z, &t ) ) // use UTC, the epoch
+ { t.tm_year = 70; t.tm_mon = t.tm_hour = t.tm_min = 0; t.tm_mday = 1; } }
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
+ const char * const link_string = !islink ? "" :
+ ( ( typeflag == tf_link ) ? " link to " : " -> " );
+ // print "user/group size" in a field of width 19 with 8 or more for size
+ if( typeflag == tf_chardev || typeflag == tf_blockdev )
+ {
+ const unsigned devmajor = parse_octal( header + devmajor_o, devmajor_l );
+ const unsigned devminor = parse_octal( header + devminor_o, devminor_l );
+ const int width = std::max( 1,
+ std::max( 8, 19 - group_string_len ) - 1 - decimal_digits( devminor ) );
+ offset += snprintf( rbuf() + offset, rbuf.size() - offset, " %*u,%u",
+ width, devmajor, devminor );
+ }
+ else
+ {
+ const int width = std::max( 8, 19 - group_string_len );
+ offset += snprintf( rbuf() + offset, rbuf.size() - offset, " %*llu",
+ width, extended.file_size() );
+ }
+ for( int i = 0; i < 2; ++i ) // resize rbuf if not large enough
+ {
+ const int len = snprintf( rbuf() + offset, rbuf.size() - offset,
+ " %4d-%02u-%02u %02u:%02u %s%s%s\n",
+ 1900 + t.tm_year, 1 + t.tm_mon, t.tm_mday, t.tm_hour,
+ t.tm_min, extended.path().c_str(), link_string,
+ islink ? extended.linkpath().c_str() : "" );
+ if( len + offset < (int)rbuf.size() ) break;
+ if( !rbuf.resize( len + offset + 1 ) ) return false;
+ }
+ }
+ else
+ {
+ if( rbuf.size() < extended.path().size() + 2 &&
+ !rbuf.resize( extended.path().size() + 2 ) ) return false;
+ snprintf( rbuf(), rbuf.size(), "%s\n", extended.path().c_str() );
+ }
+ return true;
+ }
+
+
+bool show_member_name( const Extended & extended, const Tar_header header,
+ const int vlevel, Resizable_buffer & rbuf )
+ {
+ if( verbosity >= vlevel )
+ {
+ if( !format_member_name( extended, header, rbuf, verbosity > vlevel ) )
+ { show_error( mem_msg ); return false; }
+ std::fputs( rbuf(), stdout );
+ std::fflush( stdout );
+ }
+ return true;
+ }
+
+
+bool check_skip_filename( const Cl_options & cl_opts,
+ std::vector< char > & name_pending,
+ const char * const filename, const int chdir_fd )
+ {
+ static int c_idx = -1; // parser index of last -C executed
+ if( Exclude::excluded( filename ) ) return true; // skip excluded files
+ if( cl_opts.num_files <= 0 ) return false; // no files specified, no skip
+ bool skip = true; // else skip all but the files (or trees) specified
+ bool chdir_pending = false;
+
+ for( int i = 0; i < cl_opts.parser.arguments(); ++i )
+ {
+ if( cl_opts.parser.code( i ) == 'C' ) { chdir_pending = true; continue; }
+ if( !nonempty_arg( cl_opts.parser, i ) ) continue; // skip opts, empty names
+ std::string removed_prefix;
+ const char * const name = remove_leading_dotslash(
+ cl_opts.parser.argument( i ).c_str(), &removed_prefix );
+ if( compare_prefix_dir( name, filename ) ||
+ compare_tslash( name, filename ) )
+ {
+ print_removed_prefix( removed_prefix );
+ skip = false; name_pending[i] = false;
+ if( chdir_pending && chdir_fd >= 0 )
+ {
+ if( c_idx > i )
+ { if( fchdir( chdir_fd ) != 0 )
+ { show_error( "Error changing to initial working directory", errno );
+ throw Chdir_error(); } c_idx = -1; }
+ for( int j = c_idx + 1; j < i; ++j )
+ {
+ if( cl_opts.parser.code( j ) != 'C' ) continue;
+ const char * const dir = cl_opts.parser.argument( j ).c_str();
+ if( chdir( dir ) != 0 )
+ { show_file_error( dir, chdir_msg, errno ); throw Chdir_error(); }
+ c_idx = j;
+ }
+ }
+ break;
+ }
+ }
+ return skip;
+ }
+
+
+bool make_dirs( const std::string & name )
+ {
+ int i = name.size();
+ while( i > 0 && name[i-1] == '/' ) --i; // remove trailing slashes
+ while( i > 0 && name[i-1] != '/' ) --i; // remove last component
+ while( i > 0 && name[i-1] == '/' ) --i; // remove more slashes
+ const int dirsize = i; // first slash before last component
+
+ for( i = 0; i < dirsize; ) // if dirsize == 0, dirname is '/' or empty
+ {
+ while( i < dirsize && name[i] == '/' ) ++i;
+ const int first = i;
+ while( i < dirsize && name[i] != '/' ) ++i;
+ if( first < i )
+ {
+ const std::string partial( name, 0, i );
+ const mode_t mode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
+ struct stat st;
+ if( lstat( partial.c_str(), &st ) == 0 )
+ { if( !S_ISDIR( st.st_mode ) ) { errno = ENOTDIR; return false; } }
+ else if( mkdir( partial.c_str(), mode ) != 0 && errno != EEXIST )
+ return false; // if EEXIST, another thread or process created the dir
+ }
+ }
+ return true;
+ }
diff --git a/common_mutex.cc b/common_mutex.cc
new file mode 100644
index 0000000..fb253ed
--- /dev/null
+++ b/common_mutex.cc
@@ -0,0 +1,160 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <pthread.h>
+
+#include "tarlz.h"
+#include "common_mutex.h"
+
+
+namespace {
+
+int error_status = 0;
+
+} // end namespace
+
+
+void xinit_mutex( pthread_mutex_t * const mutex )
+ {
+ const int errcode = pthread_mutex_init( mutex, 0 );
+ if( errcode )
+ { show_error( "pthread_mutex_init", errcode ); exit_fail_mt(); }
+ }
+
+void xinit_cond( pthread_cond_t * const cond )
+ {
+ const int errcode = pthread_cond_init( cond, 0 );
+ if( errcode )
+ { show_error( "pthread_cond_init", errcode ); exit_fail_mt(); }
+ }
+
+
+void xdestroy_mutex( pthread_mutex_t * const mutex )
+ {
+ const int errcode = pthread_mutex_destroy( mutex );
+ if( errcode )
+ { show_error( "pthread_mutex_destroy", errcode ); exit_fail_mt(); }
+ }
+
+void xdestroy_cond( pthread_cond_t * const cond )
+ {
+ const int errcode = pthread_cond_destroy( cond );
+ if( errcode )
+ { show_error( "pthread_cond_destroy", errcode ); exit_fail_mt(); }
+ }
+
+
+void xlock( pthread_mutex_t * const mutex )
+ {
+ const int errcode = pthread_mutex_lock( mutex );
+ if( errcode )
+ { show_error( "pthread_mutex_lock", errcode ); exit_fail_mt(); }
+ }
+
+
+void xunlock( pthread_mutex_t * const mutex )
+ {
+ const int errcode = pthread_mutex_unlock( mutex );
+ if( errcode )
+ { show_error( "pthread_mutex_unlock", errcode ); exit_fail_mt(); }
+ }
+
+
+void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex )
+ {
+ const int errcode = pthread_cond_wait( cond, mutex );
+ if( errcode )
+ { show_error( "pthread_cond_wait", errcode ); exit_fail_mt(); }
+ }
+
+
+void xsignal( pthread_cond_t * const cond )
+ {
+ const int errcode = pthread_cond_signal( cond );
+ if( errcode )
+ { show_error( "pthread_cond_signal", errcode ); exit_fail_mt(); }
+ }
+
+
+void xbroadcast( pthread_cond_t * const cond )
+ {
+ const int errcode = pthread_cond_broadcast( cond );
+ if( errcode )
+ { show_error( "pthread_cond_broadcast", errcode ); exit_fail_mt(); }
+ }
+
+
+/* This can be called from any thread, main thread or sub-threads alike,
+ since they all call common helper functions that call exit_fail_mt()
+ in case of an error.
+*/
+void exit_fail_mt( const int retval )
+ {
+ // calling 'exit' more than once results in undefined behavior
+ static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+
+ pthread_mutex_lock( &mutex ); // ignore errors to avoid loop
+ std::exit( retval );
+ }
+
+
+/* If msgp is null, print the message, else return the message in *msgp.
+ If prefix is already in the list, print nothing or return empty *msgp.
+ Return true if a message is printed or returned in *msgp. */
+bool print_removed_prefix( const std::string & prefix,
+ std::string * const msgp )
+ {
+ // prevent two threads from modifying the list of prefixes at the same time
+ static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+ static std::vector< std::string > prefixes; // list of prefixes
+
+ if( verbosity < 0 || prefix.empty() )
+ { if( msgp ) msgp->clear(); return false; }
+ xlock( &mutex );
+ for( unsigned i = 0; i < prefixes.size(); ++i )
+ if( prefixes[i] == prefix )
+ { xunlock( &mutex ); if( msgp ) msgp->clear(); return false; }
+ prefixes.push_back( prefix );
+ std::string msg( "Removing leading '" ); msg += prefix;
+ msg += "' from member names.";
+ if( msgp ) *msgp = msg; else show_error( msg.c_str() );
+ xunlock( &mutex ); // put here to prevent mixing calls to show_error
+ return true;
+ }
+
+
+void set_error_status( const int retval )
+ {
+ // prevent two threads from modifying the error_status at the same time
+ static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+
+ xlock( &mutex );
+ if( error_status < retval ) error_status = retval;
+ xunlock( &mutex );
+ }
+
+
+int final_exit_status( int retval, const bool show_msg )
+ {
+ if( retval == 0 && error_status )
+ { if( show_msg )
+ show_error( "Exiting with failure status due to previous errors." );
+ retval = error_status; }
+ return retval;
+ }
diff --git a/common_mutex.h b/common_mutex.h
new file mode 100644
index 0000000..ed3999c
--- /dev/null
+++ b/common_mutex.h
@@ -0,0 +1,30 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+void xinit_mutex( pthread_mutex_t * const mutex );
+void xinit_cond( pthread_cond_t * const cond );
+void xdestroy_mutex( pthread_mutex_t * const mutex );
+void xdestroy_cond( pthread_cond_t * const cond );
+void xlock( pthread_mutex_t * const mutex );
+void xunlock( pthread_mutex_t * const mutex );
+void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex );
+void xsignal( pthread_cond_t * const cond );
+void xbroadcast( pthread_cond_t * const cond );
+
+// non-pthread_* declarations are in tarlz.h
+
+const char * const conofin_msg = "courier not finished.";
diff --git a/compress.cc b/compress.cc
new file mode 100644
index 0000000..3091889
--- /dev/null
+++ b/compress.cc
@@ -0,0 +1,392 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cerrno>
+#include <csignal>
+#include <cstdio>
+#include <stdint.h> // for lzlib.h
+#include <unistd.h>
+#include <utime.h>
+#include <sys/stat.h>
+#include <lzlib.h>
+
+#include "tarlz.h"
+#include "arg_parser.h"
+
+
+namespace {
+
+/* Variables used in signal handler context.
+ They are not declared volatile because the handler never returns. */
+std::string output_filename;
+int outfd = -1;
+bool delete_output_on_interrupt = false;
+
+
+void set_signals( void (*action)(int) )
+ {
+ std::signal( SIGHUP, action );
+ std::signal( SIGINT, action );
+ std::signal( SIGTERM, action );
+ }
+
+
+void cleanup_and_fail( const int retval )
+ {
+ set_signals( SIG_IGN ); // ignore signals
+ if( delete_output_on_interrupt )
+ {
+ delete_output_on_interrupt = false;
+ show_file_error( output_filename.c_str(),
+ "Deleting output file, if it exists." );
+ if( outfd >= 0 ) { close( outfd ); outfd = -1; }
+ if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT )
+ show_error( "warning: deletion of output file failed", errno );
+ }
+ std::exit( retval );
+ }
+
+
+extern "C" void signal_handler( int )
+ {
+ show_error( "Control-C or similar caught, quitting." );
+ cleanup_and_fail( 1 );
+ }
+
+
+const char * ne_output_filename() // non-empty output file name
+ {
+ return output_filename.size() ? output_filename.c_str() : "(stdout)";
+ }
+
+
+bool check_tty_in( const char * const input_filename, const int infd )
+ {
+ if( isatty( infd ) ) // for example /dev/tty
+ { show_file_error( input_filename,
+ "I won't read archive data from a terminal." );
+ close( infd ); return false; }
+ return true;
+ }
+
+bool check_tty_out()
+ {
+ if( isatty( outfd ) ) // for example /dev/tty
+ { show_file_error( ne_output_filename(),
+ "I won't write compressed data to a terminal." );
+ return false; }
+ return true;
+ }
+
+
+// Set permissions, owner, and times.
+void close_and_set_permissions( const struct stat * const in_statsp )
+ {
+ bool warning = false;
+ if( in_statsp )
+ {
+ const mode_t mode = in_statsp->st_mode;
+ // fchown in many cases returns with EPERM, which can be safely ignored.
+ if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 )
+ { if( fchmod( outfd, mode ) != 0 ) warning = true; }
+ else
+ if( errno != EPERM ||
+ fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
+ warning = true;
+ }
+ if( close( outfd ) != 0 )
+ { show_file_error( output_filename.c_str(), "Error closing output file",
+ errno ); cleanup_and_fail( 1 ); }
+ outfd = -1;
+ delete_output_on_interrupt = false;
+ if( in_statsp )
+ {
+ struct utimbuf t;
+ t.actime = in_statsp->st_atime;
+ t.modtime = in_statsp->st_mtime;
+ if( utime( output_filename.c_str(), &t ) != 0 ) warning = true;
+ }
+ if( warning && verbosity >= 1 )
+ show_file_error( output_filename.c_str(),
+ "warning: can't change output file attributes", errno );
+ }
+
+
+bool archive_write( const uint8_t * const buf, const int size,
+ LZ_Encoder * const encoder )
+ {
+ static bool flushed = true; // avoid flushing empty lzip members
+
+ if( size <= 0 && flushed ) return true;
+ flushed = ( size <= 0 );
+ enum { obuf_size = 65536 };
+ uint8_t obuf[obuf_size];
+ int sz = 0;
+ if( flushed ) LZ_compress_finish( encoder ); // flush encoder
+ while( sz < size || flushed )
+ {
+ if( sz < size )
+ { const int wr = LZ_compress_write( encoder, buf + sz, size - sz );
+ if( wr < 0 ) internal_error( "library error (LZ_compress_write)." );
+ sz += wr; }
+ if( sz >= size && !flushed ) break; // minimize dictionary size
+ const int rd = LZ_compress_read( encoder, obuf, obuf_size );
+ if( rd < 0 ) internal_error( "library error (LZ_compress_read)." );
+ if( rd == 0 && sz >= size ) break;
+ if( writeblock( outfd, obuf, rd ) != rd )
+ { show_file_error( ne_output_filename(), werr_msg, errno ); return false; }
+ }
+ if( LZ_compress_finished( encoder ) == 1 &&
+ LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 )
+ internal_error( "library error (LZ_compress_restart_member)." );
+ return true;
+ }
+
+
+bool tail_compress( const Cl_options & cl_opts,
+ const int infd, Tar_header header,
+ LZ_Encoder * const encoder )
+ {
+ if( cl_opts.solidity != solid && !archive_write( 0, 0, encoder ) )
+ return false; // flush encoder before compressing EOA blocks
+ int size = header_size;
+ bool zero = true; // true until non-zero data found after EOA blocks
+ while( true )
+ {
+ if( size > 0 && !archive_write( header, size, encoder ) )
+ { close( infd ); return false; }
+ if( size < header_size ) break; // EOF
+ size = readblock( infd, header, header_size );
+ if( errno ) return false;
+ if( zero && !block_is_zero( header, size ) )
+ { zero = false; // flush encoder after compressing EOA blocks
+ if( cl_opts.solidity != solid && !archive_write( 0, 0, encoder ) )
+ return false; }
+ }
+ return true;
+ }
+
+
+int compress_archive( const Cl_options & cl_opts,
+ const std::string & input_filename,
+ LZ_Encoder * const encoder,
+ const bool to_stdout, const bool to_file )
+ {
+ const bool one_to_one = !to_stdout && !to_file;
+ const bool from_stdin = input_filename == "-";
+ const char * const filename = from_stdin ? "(stdin)" : input_filename.c_str();
+ const int infd = from_stdin ? STDIN_FILENO : open_instream( filename );
+ if( infd < 0 || !check_tty_in( filename, infd ) ) return 1;
+ if( one_to_one )
+ {
+ if( from_stdin ) { outfd = STDOUT_FILENO; output_filename.clear(); }
+ else
+ {
+ output_filename = input_filename + ".lz";
+ outfd = open_outstream( output_filename, true, 0, false );
+ if( outfd < 0 ) { close( infd ); return 1; }
+ delete_output_on_interrupt = true;
+ }
+ if( !check_tty_out() ) { close( infd ); return 1; } // don't delete a tty
+ }
+ if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
+
+ unsigned long long partial_data_size = 0; // size of current block
+ Extended extended; // metadata from extended records
+ Resizable_buffer rbuf; // headers and extended records buffer
+ if( !rbuf.size() ) { show_error( mem_msg ); return 1; }
+ const char * const rderr_msg = "Read error";
+ bool first_header = true;
+
+ while( true ) // process one tar member per iteration
+ {
+ int total_header_size = header_size; // e_header + edata + u_header
+ const int rd = readblock( infd, rbuf.u8(), header_size );
+ if( rd == 0 && errno == 0 ) // missing EOA blocks
+ { if( !first_header ) break;
+ show_file_error( filename, "Archive is empty." );
+ close( infd ); return 2; }
+ if( rd != header_size )
+ { show_file_error( filename, rderr_msg, errno ); close( infd ); return 1; }
+ first_header = false;
+
+ const bool is_header = check_ustar_chksum( rbuf.u8() );
+ const bool is_zero = !is_header && block_is_zero( rbuf.u8(), header_size );
+ if( to_file && outfd < 0 && ( is_header || is_zero ) )
+ {
+ // open outfd after checking infd
+ if( !make_dirs( output_filename ) )
+ { show_file_error( output_filename.c_str(), intdir_msg, errno );
+ return 1; }
+ outfd = open_outstream( output_filename, true, 0, false );
+ // check tty only once and don't try to delete a tty
+ if( outfd < 0 || !check_tty_out() ) { close( infd ); return 1; }
+ delete_output_on_interrupt = true;
+ }
+
+ if( !is_header ) // maybe EOA block
+ {
+ if( is_zero ) // first EOA block
+ { tail_compress( cl_opts, infd, rbuf.u8(), encoder ); break; }
+ show_file_error( filename, bad_hdr_msg ); close( infd ); return 2;
+ }
+
+ const Typeflag typeflag = (Typeflag)rbuf()[typeflag_o];
+ if( typeflag == tf_extended || typeflag == tf_global )
+ {
+ const long long edsize = parse_octal( rbuf.u8() + size_o, size_l );
+ const long long bufsize = round_up( edsize );
+ // overflow or no extended data
+ if( edsize <= 0 || edsize >= 1LL << 33 || bufsize > max_edata_size )
+ { show_file_error( filename, bad_hdr_msg ); close( infd ); return 2; }
+ if( !rbuf.resize( total_header_size + bufsize ) )
+ { show_file_error( filename, mem_msg ); close( infd ); return 1; }
+ if( readblock( infd, rbuf.u8() + total_header_size, bufsize ) != bufsize )
+ { show_file_error( filename, rderr_msg, errno ); close( infd ); return 1; }
+ total_header_size += bufsize;
+ if( typeflag == tf_extended ) // do not parse global headers
+ {
+ if( !extended.parse( rbuf() + header_size, edsize, false ) )
+ { show_file_error( filename, extrec_msg ); close( infd ); return 2; }
+ // read ustar header
+ if( !rbuf.resize( total_header_size + header_size ) )
+ { show_file_error( filename, mem_msg ); close( infd ); return 1; }
+ if( readblock( infd, rbuf.u8() + total_header_size, header_size ) != header_size )
+ { show_file_error( filename, errno ? rderr_msg : end_msg, errno );
+ close( infd ); return errno ? 1 : 2; }
+ if( !check_ustar_chksum( rbuf.u8() ) )
+ { show_file_error( filename, bad_hdr_msg ); close( infd ); return 2; }
+ const Typeflag typeflag2 = (Typeflag)(rbuf() + total_header_size)[typeflag_o];
+ if( typeflag2 == tf_extended || typeflag2 == tf_global )
+ { const char * msg = ( typeflag2 == tf_global ) ? fv_msg2 : fv_msg3;
+ show_file_error( filename, msg ); close( infd ); return 2; }
+ total_header_size += header_size;
+ }
+ }
+
+ const long long file_size = round_up( extended.get_file_size_and_reset(
+ rbuf.u8() + total_header_size - header_size ) );
+ if( cl_opts.solidity == bsolid &&
+ block_is_full( total_header_size - header_size, file_size,
+ cl_opts.data_size, partial_data_size ) &&
+ !archive_write( 0, 0, encoder ) ) { close( infd ); return 1; }
+ if( !archive_write( rbuf.u8(), total_header_size, encoder ) )
+ { close( infd ); return 1; }
+
+ if( file_size )
+ {
+ const long long bufsize = 32 * header_size;
+ uint8_t buf[bufsize];
+ long long rest = file_size; // file_size already rounded up
+ while( rest > 0 )
+ {
+ int size = std::min( rest, bufsize );
+ const int rd = readblock( infd, buf, size );
+ rest -= rd;
+ if( rd != size )
+ {
+ show_atpos_error( filename, file_size - rest, true );
+ close( infd ); return 1;
+ }
+ if( !archive_write( buf, size, encoder ) ) { close( infd ); return 1; }
+ }
+ }
+ if( cl_opts.solidity == no_solid && !archive_write( 0, 0, encoder ) )
+ { close( infd ); return 1; } // one tar member per lzip member
+ }
+ // flush and restart encoder (for next archive)
+ if( !archive_write( 0, 0, encoder ) ) { close( infd ); return 1; }
+ const bool need_close = delete_output_on_interrupt &&
+ ( one_to_one || ( to_file && !from_stdin ) );
+ struct stat in_stats;
+ const struct stat * const in_statsp =
+ ( need_close && fstat( infd, &in_stats ) == 0 ) ? &in_stats : 0;
+ if( close( infd ) != 0 )
+ { show_file_error( filename, eclosf_msg, errno ); return 1; }
+ if( need_close ) close_and_set_permissions( in_statsp );
+ return 0;
+ }
+
+} // end namespace
+
+
+void show_atpos_error( const char * const filename, const long long pos,
+ const bool isarchive )
+ {
+ if( verbosity < 0 ) return;
+ std::fprintf( stderr, "%s: %s: %s %s at pos %llu%s%s\n", program_name,
+ filename, isarchive ? "Archive" : "File",
+ ( errno > 0 ) ? "read error" : "ends unexpectedly", pos,
+ ( errno > 0 ) ? ": " : "",
+ ( errno > 0 ) ? std::strerror( errno ) : "" );
+ }
+
+
+int compress( const Cl_options & cl_opts )
+ {
+ if( cl_opts.num_files > 1 && cl_opts.output_filename.size() )
+ { show_file_error( cl_opts.output_filename.c_str(),
+ "Only can compress one archive when using '-o'." ); return 1; }
+ const bool to_stdout = cl_opts.output_filename == "-";
+ if( to_stdout ) // check tty only once
+ { outfd = STDOUT_FILENO; if( !check_tty_out() ) return 1; }
+ else outfd = -1;
+ const bool to_file = !to_stdout && cl_opts.output_filename.size();
+ if( to_file ) output_filename = cl_opts.output_filename;
+ if( !to_stdout && ( cl_opts.filenames_given || to_file ) )
+ set_signals( signal_handler );
+
+ LZ_Encoder * encoder = LZ_compress_open(
+ option_mapping[cl_opts.level].dictionary_size,
+ option_mapping[cl_opts.level].match_len_limit, LLONG_MAX );
+ if( !encoder || LZ_compress_errno( encoder ) != LZ_ok )
+ {
+ if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error )
+ show_error( mem_msg2 );
+ else
+ internal_error( "invalid argument to encoder." );
+ return 1;
+ }
+
+ if( !cl_opts.filenames_given )
+ return compress_archive( cl_opts, "-", encoder, to_stdout, to_file );
+ int retval = 0;
+ bool stdin_used = false;
+ for( int i = 0; i < cl_opts.parser.arguments(); ++i )
+ if( nonempty_arg( cl_opts.parser, i ) ) // skip opts, empty names
+ {
+ if( cl_opts.parser.argument( i ) == "-" )
+ { if( stdin_used ) continue; else stdin_used = true; }
+ const int tmp = compress_archive( cl_opts, cl_opts.parser.argument( i ),
+ encoder, to_stdout, to_file );
+ if( tmp )
+ { set_retval( retval, tmp );
+ if( delete_output_on_interrupt ) cleanup_and_fail( retval ); }
+ }
+ // flush and close encoder if needed
+ if( outfd >= 0 && archive_write( 0, 0, encoder ) &&
+ LZ_compress_close( encoder ) < 0 )
+ { show_error( "LZ_compress_close failed." ); set_retval( retval, 1 ); }
+ if( outfd >= 0 && close( outfd ) != 0 ) // to_stdout
+ {
+ show_error( "Error closing stdout", errno );
+ set_retval( retval, 1 );
+ }
+ return retval;
+ }
diff --git a/configure b/configure
new file mode 100755
index 0000000..37dbeac
--- /dev/null
+++ b/configure
@@ -0,0 +1,205 @@
+#! /bin/sh
+# configure script for Tarlz - Archiver with multimember lzip compression
+# Copyright (C) 2013-2024 Antonio Diaz Diaz.
+#
+# This configure script is free software: you have unlimited permission
+# to copy, distribute, and modify it.
+
+pkgname=tarlz
+pkgversion=0.25
+progname=tarlz
+srctrigger=doc/${pkgname}.texi
+
+# clear some things potentially inherited from environment.
+LC_ALL=C
+export LC_ALL
+srcdir=
+prefix=/usr/local
+exec_prefix='$(prefix)'
+bindir='$(exec_prefix)/bin'
+datarootdir='$(prefix)/share'
+infodir='$(datarootdir)/info'
+mandir='$(datarootdir)/man'
+CXX=g++
+CPPFLAGS=
+CXXFLAGS='-Wall -W -O2'
+LDFLAGS=
+LIBS='-llz -lpthread'
+MAKEINFO=makeinfo
+
+# checking whether we are using GNU C++.
+/bin/sh -c "${CXX} --version" > /dev/null 2>&1 || { CXX=c++ ; CXXFLAGS=-O2 ; }
+
+# Loop over all args
+args=
+no_create=
+while [ $# != 0 ] ; do
+
+ # Get the first arg, and shuffle
+ option=$1 ; arg2=no
+ shift
+
+ # Add the argument quoted to args
+ if [ -z "${args}" ] ; then args="\"${option}\""
+ else args="${args} \"${option}\"" ; fi
+
+ # Split out the argument for options that take them
+ case ${option} in
+ *=*) optarg=`echo "${option}" | sed -e 's,^[^=]*=,,;s,/$,,'` ;;
+ esac
+
+ # Process the options
+ case ${option} in
+ --help | -h)
+ echo "Usage: $0 [OPTION]... [VAR=VALUE]..."
+ echo
+ echo "To assign makefile variables (e.g., CXX, CXXFLAGS...), specify them as"
+ echo "arguments to configure in the form VAR=VALUE."
+ echo
+ echo "Options and variables: [defaults in brackets]"
+ echo " -h, --help display this help and exit"
+ echo " -V, --version output version information and exit"
+ echo " --srcdir=DIR find the source code in DIR [. or ..]"
+ echo " --prefix=DIR install into DIR [${prefix}]"
+ echo " --exec-prefix=DIR base directory for arch-dependent files [${exec_prefix}]"
+ echo " --bindir=DIR user executables directory [${bindir}]"
+ echo " --datarootdir=DIR base directory for doc and data [${datarootdir}]"
+ echo " --infodir=DIR info files directory [${infodir}]"
+ echo " --mandir=DIR man pages directory [${mandir}]"
+ echo " CXX=COMPILER C++ compiler to use [${CXX}]"
+ echo " CPPFLAGS=OPTIONS command-line options for the preprocessor [${CPPFLAGS}]"
+ echo " CXXFLAGS=OPTIONS command-line options for the C++ compiler [${CXXFLAGS}]"
+ echo " CXXFLAGS+=OPTIONS append options to the current value of CXXFLAGS"
+ echo " LDFLAGS=OPTIONS command-line options for the linker [${LDFLAGS}]"
+ echo " LIBS=OPTIONS libraries to pass to the linker [${LIBS}]"
+ echo " MAKEINFO=NAME makeinfo program to use [${MAKEINFO}]"
+ echo
+ exit 0 ;;
+ --version | -V)
+ echo "Configure script for ${pkgname} version ${pkgversion}"
+ exit 0 ;;
+ --srcdir) srcdir=$1 ; arg2=yes ;;
+ --prefix) prefix=$1 ; arg2=yes ;;
+ --exec-prefix) exec_prefix=$1 ; arg2=yes ;;
+ --bindir) bindir=$1 ; arg2=yes ;;
+ --datarootdir) datarootdir=$1 ; arg2=yes ;;
+ --infodir) infodir=$1 ; arg2=yes ;;
+ --mandir) mandir=$1 ; arg2=yes ;;
+
+ --srcdir=*) srcdir=${optarg} ;;
+ --prefix=*) prefix=${optarg} ;;
+ --exec-prefix=*) exec_prefix=${optarg} ;;
+ --bindir=*) bindir=${optarg} ;;
+ --datarootdir=*) datarootdir=${optarg} ;;
+ --infodir=*) infodir=${optarg} ;;
+ --mandir=*) mandir=${optarg} ;;
+ --no-create) no_create=yes ;;
+
+ CXX=*) CXX=${optarg} ;;
+ CPPFLAGS=*) CPPFLAGS=${optarg} ;;
+ CXXFLAGS=*) CXXFLAGS=${optarg} ;;
+ CXXFLAGS+=*) CXXFLAGS="${CXXFLAGS} ${optarg}" ;;
+ LDFLAGS=*) LDFLAGS=${optarg} ;;
+ LIBS=*) LIBS="${optarg} ${LIBS}" ;;
+ MAKEINFO=*) MAKEINFO=${optarg} ;;
+
+ --*)
+ echo "configure: WARNING: unrecognized option: '${option}'" 1>&2 ;;
+ *=* | *-*-*) ;;
+ *)
+ echo "configure: unrecognized option: '${option}'" 1>&2
+ echo "Try 'configure --help' for more information." 1>&2
+ exit 1 ;;
+ esac
+
+ # Check if the option took a separate argument
+ if [ "${arg2}" = yes ] ; then
+ if [ $# != 0 ] ; then args="${args} \"$1\"" ; shift
+ else echo "configure: Missing argument to '${option}'" 1>&2
+ exit 1
+ fi
+ fi
+done
+
+# Find the source code, if location was not specified.
+srcdirtext=
+if [ -z "${srcdir}" ] ; then
+ srcdirtext="or . or .." ; srcdir=.
+ if [ ! -r "${srcdir}/${srctrigger}" ] ; then srcdir=.. ; fi
+ if [ ! -r "${srcdir}/${srctrigger}" ] ; then
+ ## the sed command below emulates the dirname command
+ srcdir=`echo "$0" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+ fi
+fi
+
+if [ ! -r "${srcdir}/${srctrigger}" ] ; then
+ echo "configure: Can't find source code in ${srcdir} ${srcdirtext}" 1>&2
+ echo "configure: (At least ${srctrigger} is missing)." 1>&2
+ exit 1
+fi
+
+# Set srcdir to . if that's what it is.
+if [ "`pwd`" = "`cd "${srcdir}" ; pwd`" ] ; then srcdir=. ; fi
+
+echo
+if [ -z "${no_create}" ] ; then
+ echo "creating config.status"
+ rm -f config.status
+ cat > config.status << EOF
+#! /bin/sh
+# This file was generated automatically by configure. Don't edit.
+# Run this file to recreate the current configuration.
+#
+# This script is free software: you have unlimited permission
+# to copy, distribute, and modify it.
+
+exec /bin/sh "$0" ${args} --no-create
+EOF
+ chmod +x config.status
+fi
+
+echo "creating Makefile"
+echo "VPATH = ${srcdir}"
+echo "prefix = ${prefix}"
+echo "exec_prefix = ${exec_prefix}"
+echo "bindir = ${bindir}"
+echo "datarootdir = ${datarootdir}"
+echo "infodir = ${infodir}"
+echo "mandir = ${mandir}"
+echo "CXX = ${CXX}"
+echo "CPPFLAGS = ${CPPFLAGS}"
+echo "CXXFLAGS = ${CXXFLAGS}"
+echo "LDFLAGS = ${LDFLAGS}"
+echo "LIBS = ${LIBS}"
+echo "MAKEINFO = ${MAKEINFO}"
+rm -f Makefile
+cat > Makefile << EOF
+# Makefile for Tarlz - Archiver with multimember lzip compression
+# Copyright (C) 2013-2024 Antonio Diaz Diaz.
+# This file was generated automatically by configure. Don't edit.
+#
+# This Makefile is free software: you have unlimited permission
+# to copy, distribute, and modify it.
+
+pkgname = ${pkgname}
+pkgversion = ${pkgversion}
+progname = ${progname}
+VPATH = ${srcdir}
+prefix = ${prefix}
+exec_prefix = ${exec_prefix}
+bindir = ${bindir}
+datarootdir = ${datarootdir}
+infodir = ${infodir}
+mandir = ${mandir}
+CXX = ${CXX}
+CPPFLAGS = ${CPPFLAGS}
+CXXFLAGS = ${CXXFLAGS}
+LDFLAGS = ${LDFLAGS}
+LIBS = ${LIBS}
+MAKEINFO = ${MAKEINFO}
+EOF
+cat "${srcdir}/Makefile.in" >> Makefile
+
+echo "OK. Now you can run make."
+echo "If make fails, check that the compression library lzlib is correctly installed"
+echo "(see INSTALL)."
diff --git a/create.cc b/create.cc
new file mode 100644
index 0000000..5878dd3
--- /dev/null
+++ b/create.cc
@@ -0,0 +1,740 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdio>
+#include <stdint.h> // for lzlib.h
+#include <unistd.h>
+#include <sys/stat.h>
+#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \
+ !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__
+#include <sys/sysmacros.h> // for major, minor
+#else
+#include <sys/types.h> // for major, minor
+#endif
+#include <ftw.h>
+#include <grp.h>
+#include <pwd.h>
+#include <lzlib.h>
+
+#include "tarlz.h"
+#include "arg_parser.h"
+#include "create.h"
+
+
+Archive_attrs archive_attrs; // archive attributes at time of creation
+
+
+namespace {
+
+const Cl_options * gcl_opts = 0; // local vars needed by add_member
+LZ_Encoder * encoder = 0;
+const char * archive_namep = 0;
+unsigned long long partial_data_size = 0; // size of current block
+Resizable_buffer grbuf; // extended header + data
+int goutfd = -1;
+
+
+bool option_C_after_relative_filename( const Arg_parser & parser )
+ {
+ for( int i = 0; i < parser.arguments(); ++i )
+ if( nonempty_arg( parser, i ) && parser.argument( i )[0] != '/' )
+ while( ++i < parser.arguments() )
+ if( parser.code( i ) == 'C' ) return true;
+ return false;
+ }
+
+
+/* Check archive type. Return position of EOA blocks or -1 if failure.
+ If remove_eoa, leave fd file pos at beginning of the EOA blocks.
+ Else, leave fd file pos at 0.
+*/
+long long check_compressed_appendable( const int fd, const bool remove_eoa )
+ {
+ struct stat st; // fd must be regular
+ if( fstat( fd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return -1;
+ if( lseek( fd, 0, SEEK_SET ) != 0 ) return -1;
+ enum { bufsize = header_size + ( header_size / 8 ) };
+ uint8_t buf[bufsize];
+ const int rd = readblock( fd, buf, bufsize );
+ if( rd == 0 && errno == 0 ) return 0; // append to empty archive
+ if( rd < min_member_size || ( rd != bufsize && errno ) ) return -1;
+ const Lzip_header * const p = (const Lzip_header *)buf; // shut up gcc
+ if( !p->check_magic() || !p->check_version() ) return -1;
+ LZ_Decoder * decoder = LZ_decompress_open(); // decompress first header
+ if( !decoder || LZ_decompress_errno( decoder ) != LZ_ok ||
+ LZ_decompress_write( decoder, buf, rd ) != rd ||
+ LZ_decompress_read( decoder, buf, header_size ) != header_size )
+ { LZ_decompress_close( decoder ); return -1; }
+ LZ_decompress_close( decoder );
+ const bool maybe_eoa = block_is_zero( buf, header_size );
+ if( !check_ustar_chksum( buf ) && !maybe_eoa ) return -1;
+ const long long end = lseek( fd, 0, SEEK_END );
+ if( end < min_member_size ) return -1;
+
+ Lzip_trailer trailer; // read last trailer
+ if( seek_read( fd, trailer.data, Lzip_trailer::size,
+ end - Lzip_trailer::size ) != Lzip_trailer::size ) return -1;
+ const long long member_size = trailer.member_size();
+ if( member_size < min_member_size || member_size > end ||
+ ( maybe_eoa && member_size != end ) ) return -1; // garbage after EOA?
+
+ Lzip_header header; // read last header
+ if( seek_read( fd, header.data, Lzip_header::size,
+ end - member_size ) != Lzip_header::size ) return -1;
+ if( !header.check_magic() || !header.check_version() ||
+ !isvalid_ds( header.dictionary_size() ) ) return -1;
+
+ // EOA marker in last member must contain between 512 and 32256 zeros alone
+ const unsigned long long data_size = trailer.data_size();
+ if( data_size < header_size || data_size > 32256 ) return -1;
+ const unsigned data_crc = trailer.data_crc();
+ const CRC32 crc32;
+ uint32_t crc = 0xFFFFFFFFU;
+ for( unsigned i = 0; i < data_size; ++i ) crc32.update_byte( crc, 0 );
+ crc ^= 0xFFFFFFFFU;
+ if( crc != data_crc ) return -1;
+
+ const long long pos = remove_eoa ? end - member_size : 0;
+ if( lseek( fd, pos, SEEK_SET ) != pos ) return -1;
+ return end - member_size;
+ }
+
+
+/* Skip all tar headers.
+ Return position of EOA blocks, -1 if failure, -2 if out of memory.
+ If remove_eoa, leave fd file pos at beginning of the EOA blocks.
+ Else, leave fd file pos at 0.
+*/
+long long check_uncompressed_appendable( const int fd, const bool remove_eoa )
+ {
+ struct stat st; // fd must be regular
+ if( fstat( fd, &st ) != 0 || !S_ISREG( st.st_mode ) ) return -1;
+ if( lseek( fd, 0, SEEK_SET ) != 0 ) return -1;
+ if( st.st_size <= 0 ) return 0; // append to empty archive
+ long long eoa_pos = 0; // pos of EOA blocks
+ Extended extended; // metadata from extended records
+ Resizable_buffer rbuf; // extended records buffer
+ bool prev_extended = false; // prev header was extended
+ if( !rbuf.size() ) return -2;
+
+ while( true ) // process one tar header per iteration
+ {
+ Tar_header header;
+ const int rd = readblock( fd, header, header_size );
+ if( rd == 0 && errno == 0 ) break; // missing EOA blocks
+ if( rd != header_size ) return -1;
+ if( !check_ustar_chksum( header ) ) // maybe EOA block
+ { if( block_is_zero( header, header_size ) ) break; else return -1; }
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ if( typeflag == tf_extended || typeflag == tf_global )
+ {
+ if( prev_extended ) return -1;
+ const long long edsize = parse_octal( header + size_o, size_l );
+ const long long bufsize = round_up( edsize );
+ if( edsize <= 0 || edsize >= 1LL << 33 || bufsize > max_edata_size )
+ return -1; // overflow or no extended data
+ if( !rbuf.resize( bufsize ) ) return -2;
+ if( readblock( fd, rbuf.u8(), bufsize ) != bufsize )
+ return -1;
+ if( typeflag == tf_extended )
+ { if( !extended.parse( rbuf(), edsize, false ) ) return -1;
+ prev_extended = true; }
+ continue;
+ }
+ prev_extended = false;
+
+ eoa_pos = lseek( fd, round_up( extended.get_file_size_and_reset( header ) ),
+ SEEK_CUR );
+ if( eoa_pos <= 0 ) return -1;
+ }
+
+ if( prev_extended ) return -1;
+ const long long pos = remove_eoa ? eoa_pos : 0;
+ if( lseek( fd, pos, SEEK_SET ) != pos ) return -1;
+ return eoa_pos;
+ }
+
+
+bool archive_write( const uint8_t * const buf, const int size )
+ {
+ static bool flushed = true; // avoid flushing empty lzip members
+
+ if( size <= 0 && flushed ) return true;
+ flushed = ( size <= 0 );
+ if( !encoder ) // uncompressed
+ return writeblock_wrapper( goutfd, buf, size );
+ enum { obuf_size = 65536 };
+ uint8_t obuf[obuf_size];
+ int sz = 0;
+ if( size <= 0 ) LZ_compress_finish( encoder ); // flush encoder
+ while( sz < size || size <= 0 )
+ {
+ const int wr = LZ_compress_write( encoder, buf + sz, size - sz );
+ if( wr < 0 ) internal_error( "library error (LZ_compress_write)." );
+ sz += wr;
+ if( sz >= size && size > 0 ) break; // minimize dictionary size
+ const int rd = LZ_compress_read( encoder, obuf, obuf_size );
+ if( rd < 0 ) internal_error( "library error (LZ_compress_read)." );
+ if( rd == 0 && sz >= size ) break;
+ if( !writeblock_wrapper( goutfd, obuf, rd ) ) return false;
+ }
+ if( LZ_compress_finished( encoder ) == 1 &&
+ LZ_compress_restart_member( encoder, LLONG_MAX ) < 0 )
+ internal_error( "library error (LZ_compress_restart_member)." );
+ return true;
+ }
+
+
+// Return true if it stores filename in the ustar header.
+bool store_name( const char * const filename, Extended & extended,
+ Tar_header header, const bool force_extended_name )
+ {
+ const char * const stored_name =
+ remove_leading_dotslash( filename, &extended.removed_prefix, true );
+
+ if( !force_extended_name ) // try storing filename in the ustar header
+ {
+ const int len = std::strlen( stored_name );
+ enum { max_len = prefix_l + 1 + name_l }; // prefix + '/' + name
+ if( len <= name_l ) // stored_name fits in name
+ { std::memcpy( header + name_o, stored_name, len ); return true; }
+ if( len <= max_len ) // find shortest prefix
+ for( int i = len - name_l - 1; i < len && i <= prefix_l; ++i )
+ if( stored_name[i] == '/' ) // stored_name can be split
+ {
+ std::memcpy( header + name_o, stored_name + i + 1, len - i - 1 );
+ std::memcpy( header + prefix_o, stored_name, i );
+ return true;
+ }
+ }
+ // store filename in extended record, leave name zeroed in ustar header
+ extended.path( stored_name );
+ return false;
+ }
+
+
+// add one tar member to the archive
+int add_member( const char * const filename, const struct stat *,
+ const int flag, struct FTW * )
+ {
+ if( Exclude::excluded( filename ) ) return 0; // skip excluded files
+ long long file_size;
+ Extended extended; // metadata for extended records
+ Tar_header header;
+ if( !fill_headers( filename, extended, header, file_size, flag ) ) return 0;
+ print_removed_prefix( extended.removed_prefix );
+ const int infd = file_size ? open_instream( filename ) : -1;
+ if( file_size && infd < 0 ) { set_error_status( 1 ); return 0; }
+
+ const int ebsize = extended.format_block( grbuf ); // may be 0
+ if( ebsize < 0 ) { show_error( extended.full_size_error() ); return 1; }
+ if( encoder && gcl_opts->solidity == bsolid &&
+ block_is_full( ebsize, file_size, gcl_opts->data_size,
+ partial_data_size ) && !archive_write( 0, 0 ) ) return 1;
+ // write extended block to archive
+ if( ebsize > 0 && !archive_write( grbuf.u8(), ebsize ) ) return 1;
+ if( !archive_write( header, header_size ) ) return 1;
+
+ if( file_size )
+ {
+ const long long bufsize = 32 * header_size;
+ uint8_t buf[bufsize];
+ long long rest = file_size;
+ while( rest > 0 )
+ {
+ int size = std::min( rest, bufsize );
+ const int rd = readblock( infd, buf, size );
+ rest -= rd;
+ if( rd != size )
+ {
+ show_atpos_error( filename, file_size - rest, false );
+ close( infd ); return 1;
+ }
+ if( rest == 0 ) // last read
+ {
+ const int rem = file_size % header_size;
+ if( rem > 0 )
+ { const int padding = header_size - rem;
+ std::memset( buf + size, 0, padding ); size += padding; }
+ }
+ if( !archive_write( buf, size ) ) { close( infd ); return 1; }
+ }
+ if( close( infd ) != 0 )
+ { show_file_error( filename, eclosf_msg, errno ); return 1; }
+ }
+ if( encoder && gcl_opts->solidity == no_solid && !archive_write( 0, 0 ) )
+ return 1;
+ if( gcl_opts->warn_newer && archive_attrs.is_newer( filename ) )
+ { show_file_error( filename, "File is newer than the archive." );
+ set_error_status( 1 ); }
+ if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
+ return 0;
+ }
+
+
+bool check_tty_out( const char * const archive_namep, const int outfd,
+ const bool to_stdout )
+ {
+ if( isatty( outfd ) ) // for example /dev/tty
+ { show_file_error( archive_namep, to_stdout ?
+ "I won't write archive data to a terminal (missing -f option?)" :
+ "I won't write archive data to a terminal." );
+ return false; }
+ return true;
+ }
+
+} // end namespace
+
+
+// infd and outfd can refer to the same file if copying to a lower file
+// position or if source and destination blocks don't overlap.
+// max_size < 0 means no size limit.
+bool copy_file( const int infd, const int outfd, const long long max_size )
+ {
+ const long long buffer_size = 65536;
+ // remaining number of bytes to copy
+ long long rest = ( ( max_size >= 0 ) ? max_size : buffer_size );
+ long long copied_size = 0;
+ uint8_t * const buffer = new uint8_t[buffer_size];
+ bool error = false;
+
+ while( rest > 0 )
+ {
+ const int size = std::min( buffer_size, rest );
+ if( max_size >= 0 ) rest -= size;
+ const int rd = readblock( infd, buffer, size );
+ if( rd != size && errno )
+ { show_error( "Error reading input file", errno ); error = true; break; }
+ if( rd > 0 )
+ {
+ if( !writeblock_wrapper( outfd, buffer, rd ) ) { error = true; break; }
+ copied_size += rd;
+ }
+ if( rd < size ) break; // EOF
+ }
+ delete[] buffer;
+ return ( !error && ( max_size < 0 || copied_size == max_size ) );
+ }
+
+
+bool writeblock_wrapper( const int outfd, const uint8_t * const buffer,
+ const int size )
+ {
+ if( writeblock( outfd, buffer, size ) != size )
+ { show_file_error( archive_namep, werr_msg, errno ); return false; }
+ return true;
+ }
+
+
+// write End-Of-Archive records
+bool write_eoa_records( const int outfd, const bool compressed )
+ {
+ if( compressed )
+ {
+ enum { eoa_member_size = 44 };
+ const uint8_t eoa_member[eoa_member_size] = {
+ 0x4C, 0x5A, 0x49, 0x50, 0x01, 0x0C, 0x00, 0x00, 0x6F, 0xFD, 0xFF, 0xFF,
+ 0xA3, 0xB7, 0x80, 0x0C, 0x82, 0xDB, 0xFF, 0xFF, 0x9F, 0xF0, 0x00, 0x00,
+ 0x2E, 0xAF, 0xB5, 0xEF, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+ return writeblock_wrapper( outfd, eoa_member, eoa_member_size );
+ }
+ enum { bufsize = 2 * header_size };
+ uint8_t buf[bufsize];
+ std::memset( buf, 0, bufsize );
+ return writeblock_wrapper( outfd, buf, bufsize );
+ }
+
+
+/* Remove any amount of leading "./" and '/' strings from filename.
+ Optionally also remove prefixes containing a ".." component.
+ Return the removed prefix in *removed_prefixp.
+*/
+const char * remove_leading_dotslash( const char * const filename,
+ std::string * const removed_prefixp,
+ const bool dotdot )
+ {
+ const char * p = filename;
+
+ if( dotdot )
+ for( int i = 0; filename[i]; ++i )
+ if( dotdot_at_i( filename, i ) ) p = filename + i + 2;
+ while( *p == '/' || ( *p == '.' && p[1] == '/' ) ) ++p;
+ if( p != filename ) removed_prefixp->assign( filename, p - filename );
+ else removed_prefixp->clear(); // no prefix was removed
+ if( *p == 0 && *filename != 0 ) p = ".";
+ return p;
+ }
+
+
+// set file_size != 0 only for regular files
+bool fill_headers( const char * const filename, Extended & extended,
+ Tar_header header, long long & file_size, const int flag )
+ {
+ struct stat st;
+ if( hstat( filename, &st, gcl_opts->dereference ) != 0 )
+ { show_file_error( filename, cant_stat, errno );
+ set_error_status( 1 ); return false; }
+ if( archive_attrs.is_the_archive( st ) )
+ { show_file_error( archive_namep, "Archive can't contain itself; not dumped." );
+ return false; }
+ init_tar_header( header );
+ bool force_extended_name = false;
+
+ const mode_t mode = st.st_mode;
+ print_octal( header + mode_o, mode_l - 1,
+ mode & ( S_ISUID | S_ISGID | S_ISVTX |
+ S_IRWXU | S_IRWXG | S_IRWXO ) );
+ const long long uid = ( gcl_opts->uid >= 0 ) ? gcl_opts->uid : st.st_uid;
+ const long long gid = ( gcl_opts->gid >= 0 ) ? gcl_opts->gid : st.st_gid;
+ if( uid_in_ustar_range( uid ) ) print_octal( header + uid_o, uid_l - 1, uid );
+ else if( extended.set_uid( uid ) ) force_extended_name = true;
+ if( uid_in_ustar_range( gid ) ) print_octal( header + gid_o, gid_l - 1, gid );
+ else if( extended.set_gid( gid ) ) force_extended_name = true;
+ const long long mtime = gcl_opts->mtime_set ? gcl_opts->mtime : st.st_mtime;
+ if( time_in_ustar_range( mtime ) )
+ print_octal( header + mtime_o, mtime_l - 1, mtime );
+ else { extended.set_atime( gcl_opts->mtime_set ? mtime : st.st_atime );
+ extended.set_mtime( mtime ); force_extended_name = true; }
+ Typeflag typeflag;
+ if( S_ISREG( mode ) ) typeflag = tf_regular;
+ else if( S_ISDIR( mode ) )
+ {
+ typeflag = tf_directory;
+ if( flag == FTW_DNR )
+ { show_file_error( filename, "Can't open directory", errno );
+ set_error_status( 1 ); return false; }
+ }
+ else if( S_ISLNK( mode ) )
+ {
+ typeflag = tf_symlink;
+ long len, sz;
+ if( st.st_size <= linkname_l )
+ {
+ len = sz = readlink( filename, (char *)header + linkname_o, linkname_l );
+ while( len > 1 && header[linkname_o+len-1] == '/' ) // trailing '/'
+ { --len; header[linkname_o+len] = 0; }
+ }
+ else
+ {
+ char * const buf = new char[st.st_size+1];
+ len = sz = readlink( filename, buf, st.st_size );
+ if( sz == st.st_size )
+ {
+ while( len > 1 && buf[len-1] == '/' ) --len; // trailing '/'
+ if( len <= linkname_l ) std::memcpy( header + linkname_o, buf, len );
+ else { buf[len] = 0; extended.linkpath( buf );
+ force_extended_name = true; }
+ }
+ delete[] buf;
+ }
+ if( sz != st.st_size )
+ {
+ if( sz < 0 )
+ show_file_error( filename, "Error reading symbolic link", errno );
+ else
+ show_file_error( filename, "Wrong size reading symbolic link.\n"
+ "Please, send a bug report to the maintainers of your filesystem, "
+ "mentioning\n'wrong st_size of symbolic link'.\nSee "
+ "http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/sys_stat.h.html" );
+ set_error_status( 1 ); return false;
+ }
+ }
+ else if( S_ISCHR( mode ) || S_ISBLK( mode ) )
+ {
+ typeflag = S_ISCHR( mode ) ? tf_chardev : tf_blockdev;
+ if( (unsigned)major( st.st_rdev ) >= 2 << 20 ||
+ (unsigned)minor( st.st_rdev ) >= 2 << 20 )
+ { show_file_error( filename, "devmajor or devminor is larger than 2_097_151." );
+ set_error_status( 1 ); return false; }
+ print_octal( header + devmajor_o, devmajor_l - 1, major( st.st_rdev ) );
+ print_octal( header + devminor_o, devminor_l - 1, minor( st.st_rdev ) );
+ }
+ else if( S_ISFIFO( mode ) ) typeflag = tf_fifo;
+ else { show_file_error( filename, "Unknown file type." );
+ set_error_status( 2 ); return false; }
+ header[typeflag_o] = typeflag;
+
+ if( uid == (long long)( (uid_t)uid ) ) // get name if uid is in range
+ { const struct passwd * const pw = getpwuid( uid );
+ if( pw && pw->pw_name )
+ std::strncpy( (char *)header + uname_o, pw->pw_name, uname_l - 1 ); }
+
+ if( gid == (long long)( (gid_t)gid ) ) // get name if gid is in range
+ { const struct group * const gr = getgrgid( gid );
+ if( gr && gr->gr_name )
+ std::strncpy( (char *)header + gname_o, gr->gr_name, gname_l - 1 ); }
+
+ file_size = ( typeflag == tf_regular && st.st_size > 0 &&
+ st.st_size <= max_file_size ) ? st.st_size : 0;
+ if( file_size >= 1LL << 33 )
+ { extended.file_size( file_size ); force_extended_name = true; }
+ else print_octal( header + size_o, size_l - 1, file_size );
+ store_name( filename, extended, header, force_extended_name );
+ print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
+ return true;
+ }
+
+
+bool block_is_full( const int extended_size,
+ const unsigned long long file_size,
+ const unsigned long long target_size,
+ unsigned long long & partial_data_size )
+ {
+ const unsigned long long member_size = // may overflow 'long long'
+ extended_size + header_size + round_up( file_size );
+ if( partial_data_size >= target_size ||
+ ( partial_data_size >= min_data_size &&
+ partial_data_size + member_size / 2 > target_size ) )
+ { partial_data_size = member_size; return true; }
+ partial_data_size += member_size; return false;
+ }
+
+
+unsigned ustar_chksum( const Tar_header header )
+ {
+ unsigned chksum = chksum_l * 0x20; // treat chksum field as spaces
+ for( int i = 0; i < chksum_o; ++i ) chksum += header[i];
+ for( int i = chksum_o + chksum_l; i < header_size; ++i ) chksum += header[i];
+ return chksum;
+ }
+
+
+bool check_ustar_chksum( const Tar_header header )
+ { return ( check_ustar_magic( header ) &&
+ ustar_chksum( header ) == parse_octal( header + chksum_o, chksum_l ) ); }
+
+
+bool has_lz_ext( const std::string & name )
+ {
+ return ( name.size() > 3 &&
+ name.compare( name.size() - 3, 3, ".lz" ) == 0 ) ||
+ ( name.size() > 4 &&
+ name.compare( name.size() - 4, 4, ".tlz" ) == 0 );
+ }
+
+
+int Cl_options::compressed() const // tri-state bool with error (-2)
+ {
+ const int lz_ext = archive_name.empty() ? -1 : has_lz_ext( archive_name );
+ if( !level_set ) return lz_ext; // no level set in command line
+ const bool cl_compressed = !uncompressed();
+ if( lz_ext < 0 || lz_ext == cl_compressed ) return cl_compressed;
+ show_file_error( archive_name.c_str(), lz_ext ?
+ "Uncompressed archive can't have .lz or .tlz extension." :
+ "Compressed archive requires .lz or .tlz extension." );
+ return -2;
+ }
+
+
+int concatenate( const Cl_options & cl_opts )
+ {
+ if( cl_opts.num_files <= 0 )
+ { if( verbosity >= 1 ) show_error( "Nothing to concatenate." ); return 0; }
+ int compressed = cl_opts.compressed(); // tri-state bool
+ if( compressed == -2 ) return 1;
+ const bool to_stdout = cl_opts.archive_name.empty();
+ archive_namep = to_stdout ? "(stdout)" : cl_opts.archive_name.c_str();
+ const int outfd =
+ to_stdout ? STDOUT_FILENO : open_outstream( cl_opts.archive_name, false );
+ if( outfd < 0 ) return 1;
+ if( !check_tty_out( archive_namep, outfd, to_stdout ) )
+ { close( outfd ); return 1; }
+ if( !to_stdout && !archive_attrs.init( outfd ) )
+ { show_file_error( archive_namep, "Can't stat", errno ); return 1; }
+ if( !to_stdout && compressed >= 0 ) // level or ext are set in cl
+ {
+ const long long pos = compressed ?
+ check_compressed_appendable( outfd, true ) :
+ check_uncompressed_appendable( outfd, true );
+ if( pos == -2 ) { show_error( mem_msg ); close( outfd ); return 1; }
+ if( pos < 0 )
+ { show_file_error( archive_namep, compressed ?
+ "This does not look like an appendable tar.lz archive." :
+ "This does not look like an appendable tar archive." );
+ close( outfd ); return 2; }
+ }
+
+ int retval = 0;
+ bool eoa_pending = false;
+ for( int i = 0; i < cl_opts.parser.arguments(); ++i ) // copy archives
+ {
+ if( !nonempty_arg( cl_opts.parser, i ) ) continue; // skip opts, empty names
+ const char * const filename = cl_opts.parser.argument( i ).c_str();
+ if( Exclude::excluded( filename ) ) continue; // skip excluded files
+ const int infd = open_instream( filename );
+ if( infd < 0 ) { retval = 1; break; }
+ struct stat st;
+ if( !to_stdout && fstat( infd, &st ) == 0 &&
+ archive_attrs.is_the_archive( st ) )
+ { show_file_error( filename, "Archive can't contain itself; "
+ "not concatenated." ); close( infd ); continue; }
+ long long size;
+ if( compressed < 0 ) // not initialized yet
+ {
+ if( ( size = check_compressed_appendable( infd, false ) ) > 0 )
+ compressed = true;
+ else if( ( size = check_uncompressed_appendable( infd, false ) ) > 0 )
+ compressed = false;
+ else if( size != -2 ) { size = -1; compressed = has_lz_ext( filename ); }
+ }
+ else size = compressed ? check_compressed_appendable( infd, false ) :
+ check_uncompressed_appendable( infd, false );
+ if( size == -2 )
+ { show_error( mem_msg ); close( infd ); retval = 1; break; }
+ if( size < 0 )
+ { show_file_error( filename, compressed ?
+ "Not an appendable tar.lz archive." :
+ "Not an appendable tar archive." );
+ close( infd ); retval = 2; break; }
+ if( !copy_file( infd, outfd, size ) || close( infd ) != 0 )
+ { show_file_error( filename, "Error copying archive", errno );
+ eoa_pending = false; retval = 1; break; }
+ eoa_pending = true;
+ if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
+ }
+
+ if( eoa_pending && !write_eoa_records( outfd, compressed ) && retval == 0 )
+ retval = 1;
+ if( close( outfd ) != 0 && retval == 0 )
+ { show_file_error( archive_namep, eclosa_msg, errno ); retval = 1; }
+ return retval;
+ }
+
+
+int encode( const Cl_options & cl_opts )
+ {
+ if( !grbuf.size() ) { show_error( mem_msg ); return 1; }
+ int compressed = cl_opts.compressed(); // tri-state bool
+ if( compressed == -2 ) return 1;
+ const bool to_stdout = cl_opts.archive_name.empty();
+ archive_namep = to_stdout ? "(stdout)" : cl_opts.archive_name.c_str();
+ gcl_opts = &cl_opts;
+
+ const bool append = cl_opts.program_mode == m_append;
+ if( cl_opts.num_files <= 0 )
+ {
+ if( !append && !to_stdout ) // create archive
+ { show_error( "Cowardly refusing to create an empty archive.", 0, true );
+ return 1; }
+ else // create/append to stdout or append to archive
+ { if( verbosity >= 1 ) show_error( "Nothing to append." ); return 0; }
+ }
+
+ if( to_stdout ) // create/append to stdout
+ goutfd = STDOUT_FILENO;
+ else // create/append to archive
+ if( ( goutfd = open_outstream( cl_opts.archive_name, !append ) ) < 0 )
+ return 1;
+ if( !check_tty_out( archive_namep, goutfd, to_stdout ) )
+ { close( goutfd ); return 1; }
+ if( append && !to_stdout )
+ {
+ long long pos;
+ if( compressed < 0 ) // not initialized yet
+ {
+ if( ( pos = check_compressed_appendable( goutfd, true ) ) > 0 )
+ compressed = true;
+ else if( ( pos = check_uncompressed_appendable( goutfd, true ) ) > 0 )
+ compressed = false;
+ else if( pos != -2 ) { pos = -1; compressed = false; } // unknown
+ }
+ else pos = compressed ? check_compressed_appendable( goutfd, true ) :
+ check_uncompressed_appendable( goutfd, true );
+ if( pos == -2 ) { show_error( mem_msg ); close( goutfd ); return 1; }
+ if( pos < 0 )
+ { show_file_error( archive_namep, compressed ?
+ "This does not look like an appendable tar.lz archive." :
+ "This does not look like an appendable tar archive." );
+ close( goutfd ); return 2; }
+ }
+
+ if( !archive_attrs.init( goutfd ) )
+ { show_file_error( archive_namep, "Can't stat", errno );
+ close( goutfd ); return 1; }
+
+ if( compressed )
+ {
+ /* CWD is not per-thread; multi-threaded --create can't be used if a
+ -C option appears after a relative filename in the command line. */
+ if( cl_opts.solidity != asolid && cl_opts.solidity != solid &&
+ cl_opts.num_workers > 0 &&
+ !option_C_after_relative_filename( cl_opts.parser ) )
+ {
+ // show_file_error( archive_namep, "Multi-threaded --create" );
+ return encode_lz( cl_opts, archive_namep, goutfd );
+ }
+ encoder = LZ_compress_open( option_mapping[cl_opts.level].dictionary_size,
+ option_mapping[cl_opts.level].match_len_limit, LLONG_MAX );
+ if( !encoder || LZ_compress_errno( encoder ) != LZ_ok )
+ {
+ if( !encoder || LZ_compress_errno( encoder ) == LZ_mem_error )
+ show_error( mem_msg2 );
+ else
+ internal_error( "invalid argument to encoder." );
+ close( goutfd ); return 1;
+ }
+ }
+
+ int retval = 0;
+ for( int i = 0; i < cl_opts.parser.arguments(); ++i ) // parse command line
+ {
+ const int code = cl_opts.parser.code( i );
+ const std::string & arg = cl_opts.parser.argument( i );
+ const char * filename = arg.c_str();
+ if( code == 'C' && chdir( filename ) != 0 )
+ { show_file_error( filename, chdir_msg, errno ); retval = 1; break; }
+ if( code ) continue; // skip options
+ if( cl_opts.parser.argument( i ).empty() ) continue; // skip empty names
+ std::string deslashed; // arg without trailing slashes
+ unsigned len = arg.size();
+ while( len > 1 && arg[len-1] == '/' ) --len;
+ if( len < arg.size() )
+ { deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); }
+ if( Exclude::excluded( filename ) ) continue; // skip excluded files
+ struct stat st;
+ if( lstat( filename, &st ) != 0 ) // filename from command line
+ { show_file_error( filename, cant_stat, errno ); set_error_status( 1 ); }
+ else if( ( retval = nftw( filename, add_member, 16,
+ cl_opts.dereference ? 0 : FTW_PHYS ) ) != 0 )
+ break; // write error
+ else if( encoder && cl_opts.solidity == dsolid && !archive_write( 0, 0 ) )
+ { retval = 1; break; }
+ }
+
+ if( retval == 0 ) // write End-Of-Archive records
+ {
+ enum { bufsize = 2 * header_size };
+ uint8_t buf[bufsize];
+ std::memset( buf, 0, bufsize );
+ if( encoder &&
+ ( cl_opts.solidity == asolid ||
+ ( cl_opts.solidity == bsolid && partial_data_size ) ) &&
+ !archive_write( 0, 0 ) ) retval = 1; // flush encoder
+ else if( !archive_write( buf, bufsize ) ||
+ ( encoder && !archive_write( 0, 0 ) ) ) retval = 1;
+ }
+ if( encoder && LZ_compress_close( encoder ) < 0 )
+ { show_error( "LZ_compress_close failed." ); retval = 1; }
+ if( close( goutfd ) != 0 && retval == 0 )
+ { show_file_error( archive_namep, eclosa_msg, errno ); retval = 1; }
+ return final_exit_status( retval );
+ }
diff --git a/create.h b/create.h
new file mode 100644
index 0000000..d5ef7bc
--- /dev/null
+++ b/create.h
@@ -0,0 +1,47 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+class Archive_attrs
+ {
+ struct stat ast; // archive attributes at time of init
+ bool initialized;
+ bool isreg;
+
+public:
+ Archive_attrs() : initialized( false ), isreg( false ) {}
+ bool init( const int fd )
+ {
+ if( fstat( fd, &ast ) != 0 ) return false;
+ if( S_ISREG( ast.st_mode ) ) isreg = true;
+ initialized = true;
+ return true;
+ }
+ bool is_the_archive( const struct stat & st ) const
+ { return isreg && st.st_dev == ast.st_dev && st.st_ino == ast.st_ino; }
+ bool is_newer( const struct stat & st ) const
+ { return initialized && st.st_mtime > ast.st_mtime; }
+ bool is_newer( const char * const filename ) const
+ {
+ if( !initialized ) return false;
+ struct stat st;
+ return lstat( filename, &st ) != 0 || st.st_mtime > ast.st_mtime;
+ }
+ };
+
+extern Archive_attrs archive_attrs;
+
+const char * const cant_stat = "Can't stat input file";
diff --git a/create_lz.cc b/create_lz.cc
new file mode 100644
index 0000000..5436bf5
--- /dev/null
+++ b/create_lz.cc
@@ -0,0 +1,594 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdio>
+#include <queue>
+#include <pthread.h>
+#include <stdint.h> // for lzlib.h
+#include <unistd.h>
+#include <sys/stat.h>
+#include <ftw.h>
+#include <lzlib.h>
+
+#include "tarlz.h"
+#include "arg_parser.h"
+#include "common_mutex.h"
+#include "create.h"
+
+
+namespace {
+
+const Cl_options * gcl_opts = 0; // local vars needed by add_member_lz
+enum { max_packet_size = 1 << 20 };
+class Packet_courier;
+Packet_courier * courierp = 0;
+unsigned long long partial_data_size = 0; // size of current block
+
+
+class Slot_tally
+ {
+ const int num_slots; // total slots
+ int num_free; // remaining free slots
+ pthread_mutex_t mutex;
+ pthread_cond_t slot_av; // slot available
+
+ Slot_tally( const Slot_tally & ); // declared as private
+ void operator=( const Slot_tally & ); // declared as private
+
+public:
+ explicit Slot_tally( const int slots )
+ : num_slots( slots ), num_free( slots )
+ { xinit_mutex( &mutex ); xinit_cond( &slot_av ); }
+
+ ~Slot_tally() { xdestroy_cond( &slot_av ); xdestroy_mutex( &mutex ); }
+
+ bool all_free() { return ( num_free == num_slots ); }
+
+ void get_slot() // wait for a free slot
+ {
+ xlock( &mutex );
+ while( num_free <= 0 ) xwait( &slot_av, &mutex );
+ --num_free;
+ xunlock( &mutex );
+ }
+
+ void leave_slot() // return a slot to the tally
+ {
+ xlock( &mutex );
+ if( ++num_free == 1 ) xsignal( &slot_av ); // num_free was 0
+ xunlock( &mutex );
+ }
+ };
+
+
+struct Ipacket // filename, file size and headers
+ {
+ const long long file_size;
+ const std::string filename; // filename.empty() means end of lzip member
+ const Extended * const extended;
+ const uint8_t * const header;
+
+ Ipacket() : file_size( 0 ), extended( 0 ), header( 0 ) {}
+ Ipacket( const char * const name, const long long fs,
+ const Extended * const ext, const uint8_t * const head )
+ : file_size( fs ), filename( name ), extended( ext ), header( head ) {}
+ };
+
+struct Opacket // compressed data to be written to the archive
+ {
+ const uint8_t * const data; // data == 0 means end of lzip member
+ const int size; // number of bytes in data (if any)
+
+ Opacket() : data( 0 ), size( 0 ) {}
+ Opacket( uint8_t * const d, const int s ) : data( d ), size( s ) {}
+ };
+
+
+class Packet_courier // moves packets around
+ {
+public:
+ unsigned icheck_counter;
+ unsigned iwait_counter;
+ unsigned ocheck_counter;
+ unsigned owait_counter;
+private:
+ int receive_worker_id; // worker queue currently receiving packets
+ int deliver_worker_id; // worker queue currently delivering packets
+ Slot_tally slot_tally; // limits the number of input packets
+ std::vector< std::queue< const Ipacket * > > ipacket_queues;
+ std::vector< std::queue< const Opacket * > > opacket_queues;
+ int num_working; // number of workers still running
+ const int num_workers; // number of workers
+ const unsigned out_slots; // max output packets per queue
+ pthread_mutex_t imutex;
+ pthread_cond_t iav_or_eof; // input packet available or grouper done
+ pthread_mutex_t omutex;
+ pthread_cond_t oav_or_exit; // output packet available or all workers exited
+ std::vector< pthread_cond_t > slot_av; // output slot available
+ bool eof; // grouper done
+
+ Packet_courier( const Packet_courier & ); // declared as private
+ void operator=( const Packet_courier & ); // declared as private
+
+public:
+ Packet_courier( const int workers, const int in_slots, const int oslots )
+ : icheck_counter( 0 ), iwait_counter( 0 ),
+ ocheck_counter( 0 ), owait_counter( 0 ),
+ receive_worker_id( 0 ), deliver_worker_id( 0 ),
+ slot_tally( in_slots ), ipacket_queues( workers ),
+ opacket_queues( workers ), num_working( workers ),
+ num_workers( workers ), out_slots( oslots ), slot_av( workers ),
+ eof( false )
+ {
+ xinit_mutex( &imutex ); xinit_cond( &iav_or_eof );
+ xinit_mutex( &omutex ); xinit_cond( &oav_or_exit );
+ for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] );
+ }
+
+ ~Packet_courier()
+ {
+ for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy_cond( &slot_av[i] );
+ xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex );
+ xdestroy_cond( &iav_or_eof ); xdestroy_mutex( &imutex );
+ }
+
+ /* Receive an ipacket from grouper.
+ If filename.empty() (end of lzip member token), move to next queue. */
+ void receive_packet( const Ipacket * const ipacket )
+ {
+ if( !ipacket->filename.empty() )
+ slot_tally.get_slot(); // wait for a free slot
+ xlock( &imutex );
+ ipacket_queues[receive_worker_id].push( ipacket );
+ if( ipacket->filename.empty() && ++receive_worker_id >= num_workers )
+ receive_worker_id = 0;
+ xbroadcast( &iav_or_eof );
+ xunlock( &imutex );
+ }
+
+ // distribute an ipacket to a worker
+ const Ipacket * distribute_packet( const int worker_id )
+ {
+ const Ipacket * ipacket = 0;
+ xlock( &imutex );
+ ++icheck_counter;
+ while( ipacket_queues[worker_id].empty() && !eof )
+ {
+ ++iwait_counter;
+ xwait( &iav_or_eof, &imutex );
+ }
+ if( !ipacket_queues[worker_id].empty() )
+ {
+ ipacket = ipacket_queues[worker_id].front();
+ ipacket_queues[worker_id].pop();
+ }
+ xunlock( &imutex );
+ if( ipacket )
+ { if( !ipacket->filename.empty() ) slot_tally.leave_slot(); }
+ else
+ {
+ // notify muxer when last worker exits
+ xlock( &omutex );
+ if( --num_working == 0 ) xsignal( &oav_or_exit );
+ xunlock( &omutex );
+ }
+ return ipacket;
+ }
+
+ // collect an opacket from a worker
+ void collect_packet( const Opacket * const opacket, const int worker_id )
+ {
+ xlock( &omutex );
+ if( opacket->data )
+ {
+ while( opacket_queues[worker_id].size() >= out_slots )
+ xwait( &slot_av[worker_id], &omutex );
+ }
+ opacket_queues[worker_id].push( opacket );
+ if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit );
+ xunlock( &omutex );
+ }
+
+ /* Deliver an opacket to muxer.
+ If opacket data == 0, move to next queue and wait again. */
+ const Opacket * deliver_packet()
+ {
+ const Opacket * opacket = 0;
+ xlock( &omutex );
+ ++ocheck_counter;
+ while( true )
+ {
+ while( opacket_queues[deliver_worker_id].empty() && num_working > 0 )
+ {
+ ++owait_counter;
+ xwait( &oav_or_exit, &omutex );
+ }
+ if( opacket_queues[deliver_worker_id].empty() ) break;
+ opacket = opacket_queues[deliver_worker_id].front();
+ opacket_queues[deliver_worker_id].pop();
+ if( opacket_queues[deliver_worker_id].size() + 1 == out_slots )
+ xsignal( &slot_av[deliver_worker_id] );
+ if( opacket->data ) break;
+ if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0;
+ delete opacket; opacket = 0;
+ }
+ xunlock( &omutex );
+ return opacket;
+ }
+
+ void finish() // grouper has no more packets to send
+ {
+ xlock( &imutex );
+ eof = true;
+ xbroadcast( &iav_or_eof );
+ xunlock( &imutex );
+ }
+
+ bool finished() // all packets delivered to muxer
+ {
+ if( !slot_tally.all_free() || !eof || num_working != 0 ) return false;
+ for( int i = 0; i < num_workers; ++i )
+ if( !ipacket_queues[i].empty() ) return false;
+ for( int i = 0; i < num_workers; ++i )
+ if( !opacket_queues[i].empty() ) return false;
+ return true;
+ }
+ };
+
+
+// send one ipacket with tar member metadata to courier
+int add_member_lz( const char * const filename, const struct stat *,
+ const int flag, struct FTW * )
+ {
+ if( Exclude::excluded( filename ) ) return 0; // skip excluded files
+ long long file_size;
+ // metadata for extended records
+ Extended * const extended = new( std::nothrow ) Extended;
+ uint8_t * const header = extended ? new( std::nothrow ) Tar_header : 0;
+ if( !header )
+ { show_error( mem_msg ); if( extended ) delete extended; return 1; }
+ if( !fill_headers( filename, *extended, header, file_size, flag ) )
+ { delete[] header; delete extended; return 0; }
+ print_removed_prefix( extended->removed_prefix );
+
+ if( gcl_opts->solidity == bsolid )
+ {
+ const int ebsize = extended->full_size();
+ if( ebsize < 0 ) { show_error( extended->full_size_error() ); return 1; }
+ if( block_is_full( ebsize, file_size, gcl_opts->data_size,
+ partial_data_size ) )
+ courierp->receive_packet( new Ipacket ); // end of group
+ }
+ courierp->receive_packet( new Ipacket( filename, file_size, extended, header ) );
+
+ if( gcl_opts->solidity == no_solid ) // one tar member per group
+ courierp->receive_packet( new Ipacket );
+ if( verbosity >= 1 ) std::fprintf( stderr, "%s\n", filename );
+ return 0;
+ }
+
+
+struct Grouper_arg
+ {
+ const Cl_options * cl_opts;
+ Packet_courier * courier;
+ };
+
+
+/* Package metadata of the files to be archived and pass them to the
+ courier for distribution to workers.
+*/
+extern "C" void * grouper( void * arg )
+ {
+ const Grouper_arg & tmp = *(const Grouper_arg *)arg;
+ const Cl_options & cl_opts = *tmp.cl_opts;
+ Packet_courier & courier = *tmp.courier;
+
+ for( int i = 0; i < cl_opts.parser.arguments(); ++i ) // parse command line
+ {
+ const int code = cl_opts.parser.code( i );
+ const std::string & arg = cl_opts.parser.argument( i );
+ const char * filename = arg.c_str();
+ if( code == 'C' && chdir( filename ) != 0 )
+ { show_file_error( filename, chdir_msg, errno ); exit_fail_mt(); }
+ if( code ) continue; // skip options
+ if( cl_opts.parser.argument( i ).empty() ) continue; // skip empty names
+ std::string deslashed; // arg without trailing slashes
+ unsigned len = arg.size();
+ while( len > 1 && arg[len-1] == '/' ) --len;
+ if( len < arg.size() )
+ { deslashed.assign( arg, 0, len ); filename = deslashed.c_str(); }
+ if( Exclude::excluded( filename ) ) continue; // skip excluded files
+ struct stat st;
+ if( lstat( filename, &st ) != 0 ) // filename from command line
+ { show_file_error( filename, cant_stat, errno ); set_error_status( 1 ); }
+ else if( nftw( filename, add_member_lz, 16,
+ cl_opts.dereference ? 0 : FTW_PHYS ) != 0 )
+ exit_fail_mt(); // write error or OOM
+ else if( cl_opts.solidity == dsolid ) // end of group
+ courier.receive_packet( new Ipacket );
+ }
+
+ if( cl_opts.solidity == bsolid && partial_data_size ) // finish last block
+ { partial_data_size = 0; courierp->receive_packet( new Ipacket ); }
+ courier.finish(); // no more packets to send
+ return 0;
+ }
+
+
+/* Write ibuf to encoder. To minimize dictionary size, do not read from
+ encoder until encoder's input buffer is full or finish is true.
+ Send opacket to courier and allocate new obuf each time obuf is full.
+*/
+void loop_encode( const uint8_t * const ibuf, const int isize,
+ uint8_t * & obuf, int & opos, Packet_courier & courier,
+ LZ_Encoder * const encoder, const int worker_id,
+ const bool finish = false )
+ {
+ int ipos = 0;
+ if( opos < 0 || opos > max_packet_size )
+ internal_error( "bad buffer index in loop_encode." );
+ while( true )
+ {
+ if( ipos < isize )
+ {
+ const int wr = LZ_compress_write( encoder, ibuf + ipos, isize - ipos );
+ if( wr < 0 ) internal_error( "library error (LZ_compress_write)." );
+ ipos += wr;
+ }
+ if( ipos >= isize ) // ibuf is empty
+ { if( finish ) LZ_compress_finish( encoder ); else break; }
+ const int rd =
+ LZ_compress_read( encoder, obuf + opos, max_packet_size - opos );
+ if( rd < 0 )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "LZ_compress_read error: %s\n",
+ LZ_strerror( LZ_compress_errno( encoder ) ) );
+ exit_fail_mt();
+ }
+ opos += rd;
+ // obuf is full or last opacket in lzip member
+ if( opos >= max_packet_size || LZ_compress_finished( encoder ) == 1 )
+ {
+ if( opos > max_packet_size )
+ internal_error( "opacket size exceeded in worker." );
+ courier.collect_packet( new Opacket( obuf, opos ), worker_id );
+ opos = 0; obuf = new( std::nothrow ) uint8_t[max_packet_size];
+ if( !obuf ) { show_error( mem_msg2 ); exit_fail_mt(); }
+ if( LZ_compress_finished( encoder ) == 1 )
+ {
+ if( LZ_compress_restart_member( encoder, LLONG_MAX ) >= 0 ) break;
+ show_error( "LZ_compress_restart_member failed." ); exit_fail_mt();
+ }
+ }
+ }
+ if( ipos > isize ) internal_error( "ipacket size exceeded in worker." );
+ if( ipos < isize ) internal_error( "input not fully consumed in worker." );
+ }
+
+
+struct Worker_arg
+ {
+ Packet_courier * courier;
+ int dictionary_size;
+ int match_len_limit;
+ int worker_id;
+ };
+
+
+/* Get ipackets from courier, compress headers and file data, and give the
+ opackets produced to courier.
+*/
+extern "C" void * cworker( void * arg )
+ {
+ const Worker_arg & tmp = *(const Worker_arg *)arg;
+ Packet_courier & courier = *tmp.courier;
+ const int dictionary_size = tmp.dictionary_size;
+ const int match_len_limit = tmp.match_len_limit;
+ const int worker_id = tmp.worker_id;
+
+ LZ_Encoder * encoder = 0;
+ uint8_t * data = 0;
+ Resizable_buffer rbuf; // extended header + data
+ if( !rbuf.size() ) { show_error( mem_msg2 ); exit_fail_mt(); }
+
+ int opos = 0;
+ bool flushed = true; // avoid producing empty lzip members
+ while( true )
+ {
+ const Ipacket * const ipacket = courier.distribute_packet( worker_id );
+ if( !ipacket ) break; // no more packets to process
+ if( ipacket->filename.empty() ) // end of group
+ {
+ if( !flushed ) // this lzip member is not empty
+ loop_encode( 0, 0, data, opos, courier, encoder, worker_id, true );
+ courier.collect_packet( new Opacket, worker_id ); // end of member token
+ flushed = true; delete ipacket; continue;
+ }
+
+ const char * const filename = ipacket->filename.c_str();
+ const int infd = ipacket->file_size ? open_instream( filename ) : -1;
+ if( ipacket->file_size && infd < 0 ) // can't read file data
+ { delete[] ipacket->header; delete ipacket->extended; delete ipacket;
+ set_error_status( 1 ); continue; } // skip file
+
+ flushed = false;
+ if( !encoder ) // init encoder just before using it
+ {
+ data = new( std::nothrow ) uint8_t[max_packet_size];
+ encoder = LZ_compress_open( dictionary_size, match_len_limit, LLONG_MAX );
+ if( !data || !encoder || LZ_compress_errno( encoder ) != LZ_ok )
+ {
+ if( !data || !encoder || LZ_compress_errno( encoder ) == LZ_mem_error )
+ show_error( mem_msg2 );
+ else
+ internal_error( "invalid argument to encoder." );
+ exit_fail_mt();
+ }
+ }
+
+ const int ebsize = ipacket->extended->format_block( rbuf ); // may be 0
+ if( ebsize < 0 )
+ { show_error( ipacket->extended->full_size_error() ); exit_fail_mt(); }
+ if( ebsize > 0 ) // compress extended block
+ loop_encode( rbuf.u8(), ebsize, data, opos, courier, encoder, worker_id );
+ // compress ustar header
+ loop_encode( ipacket->header, header_size, data, opos, courier,
+ encoder, worker_id );
+ delete[] ipacket->header; delete ipacket->extended;
+
+ if( ipacket->file_size )
+ {
+ const long long bufsize = 32 * header_size;
+ uint8_t buf[bufsize];
+ long long rest = ipacket->file_size;
+ while( rest > 0 )
+ {
+ int size = std::min( rest, bufsize );
+ const int rd = readblock( infd, buf, size );
+ rest -= rd;
+ if( rd != size )
+ {
+ show_atpos_error( filename, ipacket->file_size - rest, false );
+ close( infd ); exit_fail_mt();
+ }
+ if( rest == 0 ) // last read
+ {
+ const int rem = ipacket->file_size % header_size;
+ if( rem > 0 )
+ { const int padding = header_size - rem;
+ std::memset( buf + size, 0, padding ); size += padding; }
+ }
+ // compress size bytes of file
+ loop_encode( buf, size, data, opos, courier, encoder, worker_id );
+ }
+ if( close( infd ) != 0 )
+ { show_file_error( filename, eclosf_msg, errno ); exit_fail_mt(); }
+ }
+ if( gcl_opts->warn_newer && archive_attrs.is_newer( filename ) )
+ { show_file_error( filename, "File is newer than the archive." );
+ set_error_status( 1 ); }
+ delete ipacket;
+ }
+ if( data ) delete[] data;
+ if( encoder && LZ_compress_close( encoder ) < 0 )
+ { show_error( "LZ_compress_close failed." ); exit_fail_mt(); }
+ return 0;
+ }
+
+
+/* Get from courier the processed and sorted packets, and write
+ their contents to the output archive.
+*/
+void muxer( Packet_courier & courier, const int outfd )
+ {
+ while( true )
+ {
+ const Opacket * const opacket = courier.deliver_packet();
+ if( !opacket ) break; // queue is empty. all workers exited
+
+ if( !writeblock_wrapper( outfd, opacket->data, opacket->size ) )
+ exit_fail_mt();
+ delete[] opacket->data;
+ delete opacket;
+ }
+ }
+
+} // end namespace
+
+
+// init the courier, then start the grouper and the workers and call the muxer
+int encode_lz( const Cl_options & cl_opts, const char * const archive_namep,
+ const int outfd )
+ {
+ const int in_slots = 65536; // max small files (<=512B) in 64 MiB
+ const int num_workers = cl_opts.num_workers;
+ const int total_in_slots = ( INT_MAX / num_workers >= in_slots ) ?
+ num_workers * in_slots : INT_MAX;
+ const int dictionary_size = option_mapping[cl_opts.level].dictionary_size;
+ const int match_len_limit = option_mapping[cl_opts.level].match_len_limit;
+ gcl_opts = &cl_opts;
+
+ /* If an error happens after any threads have been started, exit must be
+ called before courier goes out of scope. */
+ Packet_courier courier( num_workers, total_in_slots, cl_opts.out_slots );
+ courierp = &courier; // needed by add_member_lz
+
+ Grouper_arg grouper_arg;
+ grouper_arg.cl_opts = &cl_opts;
+ grouper_arg.courier = &courier;
+
+ pthread_t grouper_thread;
+ int errcode = pthread_create( &grouper_thread, 0, grouper, &grouper_arg );
+ if( errcode )
+ { show_error( "Can't create grouper thread", errcode ); return 1; }
+
+ Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
+ pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
+ if( !worker_args || !worker_threads )
+ { show_error( mem_msg ); exit_fail_mt(); }
+ for( int i = 0; i < num_workers; ++i )
+ {
+ worker_args[i].courier = &courier;
+ worker_args[i].dictionary_size = dictionary_size;
+ worker_args[i].match_len_limit = match_len_limit;
+ worker_args[i].worker_id = i;
+ errcode = pthread_create( &worker_threads[i], 0, cworker, &worker_args[i] );
+ if( errcode )
+ { show_error( "Can't create worker threads", errcode ); exit_fail_mt(); }
+ }
+
+ muxer( courier, outfd );
+
+ for( int i = num_workers - 1; i >= 0; --i )
+ {
+ errcode = pthread_join( worker_threads[i], 0 );
+ if( errcode )
+ { show_error( "Can't join worker threads", errcode ); exit_fail_mt(); }
+ }
+ delete[] worker_threads;
+ delete[] worker_args;
+
+ errcode = pthread_join( grouper_thread, 0 );
+ if( errcode )
+ { show_error( "Can't join grouper thread", errcode ); exit_fail_mt(); }
+
+ // write End-Of-Archive records
+ int retval = !write_eoa_records( outfd, true );
+
+ if( close( outfd ) != 0 && retval == 0 )
+ { show_file_error( archive_namep, eclosa_msg, errno ); retval = 1; }
+
+ if( cl_opts.debug_level & 1 )
+ std::fprintf( stderr,
+ "any worker tried to consume from grouper %8u times\n"
+ "any worker had to wait %8u times\n"
+ "muxer tried to consume from workers %8u times\n"
+ "muxer had to wait %8u times\n",
+ courier.icheck_counter,
+ courier.iwait_counter,
+ courier.ocheck_counter,
+ courier.owait_counter );
+
+ if( !courier.finished() ) internal_error( conofin_msg );
+ return final_exit_status( retval );
+ }
diff --git a/decode.cc b/decode.cc
new file mode 100644
index 0000000..bcac4c8
--- /dev/null
+++ b/decode.cc
@@ -0,0 +1,533 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cctype>
+#include <cerrno>
+#include <cstdio>
+#include <fcntl.h>
+#include <stdint.h> // for lzlib.h
+#include <unistd.h>
+#include <utime.h>
+#include <sys/stat.h>
+#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \
+ !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__
+#include <sys/sysmacros.h> // for major, minor, makedev
+#else
+#include <sys/types.h> // for major, minor, makedev
+#endif
+#include <lzlib.h>
+
+#include "tarlz.h"
+#include "arg_parser.h"
+#include "lzip_index.h"
+#include "archive_reader.h"
+#include "decode.h"
+
+#ifndef O_DIRECTORY
+#define O_DIRECTORY 0
+#endif
+
+namespace {
+
+Resizable_buffer grbuf;
+
+bool skip_warn( const bool reset = false ) // avoid duplicate warnings
+ {
+ static bool skipping = false;
+
+ if( reset ) skipping = false;
+ else if( !skipping )
+ { skipping = true; show_error( "Skipping to next header." ); return true; }
+ return false;
+ }
+
+
+void read_error( const Archive_reader & ar )
+ {
+ show_file_error( ar.ad.namep, ar.e_msg(), ar.e_code() );
+ if( ar.e_skip() ) skip_warn();
+ }
+
+
+int skip_member( Archive_reader & ar, const Extended & extended,
+ const Typeflag typeflag )
+ {
+ if( data_may_follow( typeflag ) )
+ { const int ret = ar.skip_member( extended );
+ if( ret != 0 ) { read_error( ar ); if( ar.fatal() ) return ret; } }
+ return 0;
+ }
+
+
+int compare_member( const Cl_options & cl_opts, Archive_reader & ar,
+ const Extended & extended, const Tar_header header )
+ {
+ if( !show_member_name( extended, header, 1, grbuf ) ) return 1;
+ std::string estr, ostr;
+ const bool stat_differs =
+ !compare_file_type( estr, ostr, cl_opts, extended, header );
+ if( estr.size() ) std::fputs( estr.c_str(), stderr );
+ if( ostr.size() ) { std::fputs( ostr.c_str(), stdout ); std::fflush( stdout ); }
+ if( extended.file_size() <= 0 ) return 0;
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ if( ( typeflag != tf_regular && typeflag != tf_hiperf ) || stat_differs )
+ return skip_member( ar, extended, typeflag );
+ // else compare file contents
+ const char * const filename = extended.path().c_str();
+ const int infd2 = open_instream( filename );
+ if( infd2 < 0 )
+ { set_error_status( 1 ); return skip_member( ar, extended, typeflag ); }
+ int retval = compare_file_contents( estr, ostr, ar, extended.file_size(),
+ filename, infd2 );
+ if( retval ) { read_error( ar ); if( !ar.fatal() ) retval = 0; }
+ else { if( estr.size() ) std::fputs( estr.c_str(), stderr );
+ if( ostr.size() )
+ { std::fputs( ostr.c_str(), stdout ); std::fflush( stdout ); } }
+ return retval;
+ }
+
+
+int list_member( Archive_reader & ar,
+ const Extended & extended, const Tar_header header )
+ {
+ if( !show_member_name( extended, header, 0, grbuf ) ) return 1;
+ return skip_member( ar, extended, (Typeflag)header[typeflag_o] );
+ }
+
+
+int extract_member( const Cl_options & cl_opts, Archive_reader & ar,
+ const Extended & extended, const Tar_header header )
+ {
+ const char * const filename = extended.path().c_str();
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ if( contains_dotdot( filename ) )
+ {
+ show_file_error( filename, dotdot_msg );
+ return skip_member( ar, extended, typeflag );
+ }
+ mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
+ if( geteuid() != 0 && !cl_opts.preserve_permissions ) mode &= ~get_umask();
+ int outfd = -1;
+
+ if( !show_member_name( extended, header, 1, grbuf ) ) return 1;
+ // remove file (or empty dir) before extraction to prevent following links
+ std::remove( filename );
+ if( !make_dirs( filename ) )
+ {
+ show_file_error( filename, intdir_msg, errno );
+ set_error_status( 1 );
+ return skip_member( ar, extended, typeflag );
+ }
+
+ switch( typeflag )
+ {
+ case tf_regular:
+ case tf_hiperf:
+ outfd = open_outstream( filename );
+ if( outfd < 0 )
+ { set_error_status( 1 ); return skip_member( ar, extended, typeflag ); }
+ break;
+ case tf_link:
+ case tf_symlink:
+ {
+ const char * const linkname = extended.linkpath().c_str();
+ const bool hard = typeflag == tf_link;
+ if( ( hard && link( linkname, filename ) != 0 ) ||
+ ( !hard && symlink( linkname, filename ) != 0 ) )
+ {
+ print_error( errno, cantln_msg, hard ? "" : "sym", linkname, filename );
+ set_error_status( 1 );
+ }
+ } break;
+ case tf_directory:
+ if( mkdir( filename, mode ) != 0 && errno != EEXIST )
+ {
+ show_file_error( filename, mkdir_msg, errno );
+ set_error_status( 1 );
+ }
+ break;
+ case tf_chardev:
+ case tf_blockdev:
+ {
+ const unsigned dev =
+ makedev( parse_octal( header + devmajor_o, devmajor_l ),
+ parse_octal( header + devminor_o, devminor_l ) );
+ const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode;
+ if( mknod( filename, dmode, dev ) != 0 )
+ {
+ show_file_error( filename, mknod_msg, errno );
+ set_error_status( 1 );
+ }
+ break;
+ }
+ case tf_fifo:
+ if( mkfifo( filename, mode ) != 0 )
+ {
+ show_file_error( filename, mkfifo_msg, errno );
+ set_error_status( 1 );
+ }
+ break;
+ default:
+ print_error( 0, uftype_msg, filename, typeflag );
+ set_error_status( 2 );
+ return skip_member( ar, extended, typeflag );
+ }
+
+ const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
+ errno = 0;
+ if( !islink &&
+ ( !uid_gid_in_range( extended.get_uid(), extended.get_gid() ) ||
+ chown( filename, extended.get_uid(), extended.get_gid() ) != 0 ) )
+ {
+ if( outfd >= 0 ) mode &= ~( S_ISUID | S_ISGID | S_ISVTX );
+ // chown in many cases returns with EPERM, which can be safely ignored.
+ if( errno != EPERM && errno != EINVAL )
+ { show_file_error( filename, chown_msg, errno ); set_error_status( 1 ); }
+ }
+
+ if( outfd >= 0 ) fchmod( outfd, mode ); // ignore errors
+
+ if( data_may_follow( typeflag ) )
+ {
+ const int bufsize = 32 * header_size;
+ uint8_t buf[bufsize];
+ long long rest = extended.file_size();
+ const int rem = rest % header_size;
+ const int padding = rem ? header_size - rem : 0;
+ while( rest > 0 )
+ {
+ const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding;
+ const int ret = ar.read( buf, rsize );
+ if( ret != 0 )
+ {
+ read_error( ar );
+ if( outfd >= 0 )
+ {
+ if( cl_opts.keep_damaged )
+ { writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) );
+ close( outfd ); }
+ else { close( outfd ); std::remove( filename ); }
+ }
+ if( ar.fatal() ) return ret; else return 0;
+ }
+ const int wsize = ( rest >= bufsize ) ? bufsize : rest;
+ if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize )
+ { show_file_error( filename, werr_msg, errno ); return 1; }
+ rest -= wsize;
+ }
+ }
+ if( outfd >= 0 && close( outfd ) != 0 )
+ { show_file_error( filename, eclosf_msg, errno ); return 1; }
+ if( !islink )
+ {
+ struct utimbuf t;
+ t.actime = extended.atime().sec();
+ t.modtime = extended.mtime().sec();
+ utime( filename, &t ); // ignore errors
+ }
+ return 0;
+ }
+
+
+void format_file_diff( std::string & ostr, const char * const filename,
+ const char * const msg )
+ { if( verbosity >= 0 )
+ { ostr += filename; ostr += ": "; ostr += msg; ostr += '\n'; } }
+
+
+bool option_C_present( const Arg_parser & parser )
+ {
+ for( int i = 0; i < parser.arguments(); ++i )
+ if( parser.code( i ) == 'C' ) return true;
+ return false;
+ }
+
+
+bool option_C_after_filename( const Arg_parser & parser )
+ {
+ for( int i = 0; i < parser.arguments(); ++i )
+ if( nonempty_arg( parser, i ) )
+ while( ++i < parser.arguments() )
+ if( parser.code( i ) == 'C' ) return true;
+ return false;
+ }
+
+} // end namespace
+
+
+mode_t get_umask()
+ {
+ static mode_t mask = 0; // read once, cache the result
+ static bool first_call = true;
+ if( first_call ) { first_call = false; mask = umask( 0 ); umask( mask );
+ mask &= S_IRWXU | S_IRWXG | S_IRWXO; }
+ return mask;
+ }
+
+
+bool compare_file_type( std::string & estr, std::string & ostr,
+ const Cl_options & cl_opts,
+ const Extended & extended, const Tar_header header )
+ {
+ const char * const filename = extended.path().c_str();
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ struct stat st;
+ bool diff = false, size_differs = false, type_differs = true;
+ if( hstat( filename, &st, cl_opts.dereference ) != 0 )
+ format_file_error( estr, filename, "warning: can't stat", errno );
+ else if( ( typeflag == tf_regular || typeflag == tf_hiperf ) &&
+ !S_ISREG( st.st_mode ) )
+ format_file_diff( ostr, filename, "Is not a regular file" );
+ else if( typeflag == tf_symlink && !S_ISLNK( st.st_mode ) )
+ format_file_diff( ostr, filename, "Is not a symlink" );
+ else if( typeflag == tf_chardev && !S_ISCHR( st.st_mode ) )
+ format_file_diff( ostr, filename, "Is not a character device" );
+ else if( typeflag == tf_blockdev && !S_ISBLK( st.st_mode ) )
+ format_file_diff( ostr, filename, "Is not a block device" );
+ else if( typeflag == tf_directory && !S_ISDIR( st.st_mode ) )
+ format_file_diff( ostr, filename, "Is not a directory" );
+ else if( typeflag == tf_fifo && !S_ISFIFO( st.st_mode ) )
+ format_file_diff( ostr, filename, "Is not a FIFO" );
+ else
+ {
+ type_differs = false;
+ if( typeflag != tf_symlink && !cl_opts.ignore_metadata )
+ {
+ const mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
+ if( mode != ( st.st_mode & ( S_ISUID | S_ISGID | S_ISVTX |
+ S_IRWXU | S_IRWXG | S_IRWXO ) ) )
+ { format_file_diff( ostr, filename, "Mode differs" ); diff = true; }
+ }
+ if( !cl_opts.ignore_ids && !cl_opts.ignore_metadata )
+ {
+ if( extended.get_uid() != (long long)st.st_uid )
+ { format_file_diff( ostr, filename, "Uid differs" ); diff = true; }
+ if( extended.get_gid() != (long long)st.st_gid )
+ { format_file_diff( ostr, filename, "Gid differs" ); diff = true; }
+ }
+ if( typeflag != tf_symlink )
+ {
+ if( typeflag != tf_directory && !cl_opts.ignore_metadata &&
+ extended.mtime().sec() != (long long)st.st_mtime )
+ {
+ if( (time_t)extended.mtime().sec() == st.st_mtime )
+ { if( !cl_opts.ignore_overflow ) { diff = true;
+ format_file_diff( ostr, filename, "Mod time overflow" ); } }
+ else { diff = true;
+ format_file_diff( ostr, filename, "Mod time differs" ); }
+ }
+ if( ( typeflag == tf_regular || typeflag == tf_hiperf ) &&
+ extended.file_size() != st.st_size ) // don't compare contents
+ { format_file_diff( ostr, filename, "Size differs" ); size_differs = true; }
+ if( ( typeflag == tf_chardev || typeflag == tf_blockdev ) &&
+ ( parse_octal( header + devmajor_o, devmajor_l ) !=
+ (unsigned)major( st.st_rdev ) ||
+ parse_octal( header + devminor_o, devminor_l ) !=
+ (unsigned)minor( st.st_rdev ) ) )
+ { format_file_diff( ostr, filename, "Device number differs" ); diff = true; }
+ }
+ else
+ {
+ char * const buf = new char[st.st_size+1];
+ long len = readlink( filename, buf, st.st_size );
+ bool e = ( len != st.st_size );
+ if( !e )
+ {
+ while( len > 1 && buf[len-1] == '/' ) --len; // trailing '/'
+ buf[len] = 0;
+ if( extended.linkpath() != buf ) e = true;
+ }
+ delete[] buf;
+ if( e ) { format_file_diff( ostr, filename, "Symlink differs" ); diff = true; }
+ }
+ }
+ if( diff || size_differs || type_differs ) set_error_status( 1 );
+ return !( size_differs || type_differs );
+ }
+
+
+bool compare_file_contents( std::string & estr, std::string & ostr,
+ Archive_reader_base & ar, const long long file_size,
+ const char * const filename, const int infd2 )
+ {
+ long long rest = file_size;
+ const int rem = rest % header_size;
+ const int padding = rem ? header_size - rem : 0;
+ const int bufsize = 32 * header_size;
+ uint8_t buf1[bufsize];
+ uint8_t buf2[bufsize];
+ int retval = 0;
+ bool diff = false;
+ estr.clear(); ostr.clear();
+ while( rest > 0 )
+ {
+ const int rsize1 = ( rest >= bufsize ) ? bufsize : rest + padding;
+ const int rsize2 = ( rest >= bufsize ) ? bufsize : rest;
+ if( ( retval = ar.read( buf1, rsize1 ) ) != 0 ) { diff = true; break; }
+ if( !diff )
+ {
+ const int rd = readblock( infd2, buf2, rsize2 );
+ if( rd != rsize2 )
+ {
+ if( errno ) format_file_error( estr, filename, "Read error", errno );
+ else format_file_diff( ostr, filename, "EOF found in file" );
+ diff = true;
+ }
+ else
+ {
+ int i = 0; while( i < rsize2 && buf1[i] == buf2[i] ) ++i;
+ if( i < rsize2 )
+ { format_file_diff( ostr, filename, "Contents differ" ); diff = true; }
+ }
+ }
+ if( rest < bufsize ) break;
+ rest -= rsize1;
+ }
+ close( infd2 );
+ if( diff ) set_error_status( 1 );
+ return retval;
+ }
+
+
+int decode( const Cl_options & cl_opts )
+ {
+ if( !grbuf.size() ) { show_error( mem_msg ); return 1; }
+ // open archive before changing working directory
+ const Archive_descriptor ad( cl_opts.archive_name );
+ if( ad.infd < 0 ) return 1;
+
+ const bool c_present = option_C_present( cl_opts.parser ) &&
+ cl_opts.program_mode != m_list;
+ const bool c_after_name = c_present &&
+ option_C_after_filename( cl_opts.parser );
+ // save current working directory for sequential decoding
+ const int chdir_fd = c_after_name ? open( ".", O_RDONLY | O_DIRECTORY ) : -1;
+ if( c_after_name && chdir_fd < 0 )
+ { show_error( "Can't save current working directory", errno ); return 1; }
+ if( c_present && !c_after_name ) // execute all -C options
+ for( int i = 0; i < cl_opts.parser.arguments(); ++i )
+ {
+ if( cl_opts.parser.code( i ) != 'C' ) continue;
+ const char * const dir = cl_opts.parser.argument( i ).c_str();
+ if( chdir( dir ) != 0 )
+ { show_file_error( dir, chdir_msg, errno ); return 1; }
+ }
+ /* Mark filenames to be compared, extracted or listed.
+ name_pending is of type char instead of bool to allow concurrent update. */
+ std::vector< char > name_pending( cl_opts.parser.arguments(), false );
+ for( int i = 0; i < cl_opts.parser.arguments(); ++i )
+ if( nonempty_arg( cl_opts.parser, i ) && // skip opts, empty names
+ !Exclude::excluded( cl_opts.parser.argument( i ).c_str() ) )
+ name_pending[i] = true;
+
+ /* multi-threaded --list is faster even with 1 thread and 1 file in archive
+ but multi-threaded --diff and --extract probably need at least 2 of each.
+ CWD is not per-thread; multi-threaded decode can't be used if a
+ -C option appears after a file name in the command line. */
+ if( cl_opts.num_workers > 0 && !c_after_name && ad.indexed &&
+ ad.lzip_index.members() >= 2 ) // 2 lzip members may be 1 file + EOA
+ return decode_lz( cl_opts, ad, name_pending );
+
+ Archive_reader ar( ad ); // serial reader
+ Extended extended; // metadata from extended records
+ int retval = 0;
+ bool prev_extended = false; // prev header was extended
+ while( true ) // process one tar header per iteration
+ {
+ Tar_header header;
+ const int ret = ar.read( header, header_size );
+ if( ret != 0 ) { read_error( ar ); if( ar.fatal() ) { retval = ret; break; } }
+ if( ret != 0 || !check_ustar_chksum( header ) ) // error or EOA
+ {
+ if( ret == 0 && block_is_zero( header, header_size ) ) // EOA
+ {
+ if( !prev_extended || cl_opts.permissive ) break;
+ show_file_error( ad.namep, fv_msg1 );
+ retval = 2; break;
+ }
+ if( skip_warn() && verbosity >= 2 )
+ std::fprintf( stderr, "ustar chksum = %07o\n", ustar_chksum( header ) );
+ set_error_status( 2 ); continue;
+ }
+ skip_warn( true ); // reset warning
+
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ if( typeflag == tf_global )
+ {
+ if( prev_extended && !cl_opts.permissive )
+ { show_file_error( ad.namep, fv_msg2 ); retval = 2; break; }
+ Extended dummy; // global headers are parsed and ignored
+ const int ret = ar.parse_records( dummy, header, grbuf, gblrec_msg, true );
+ if( ret != 0 )
+ { show_file_error( ad.namep, ar.e_msg(), ar.e_code() );
+ if( ar.fatal() ) { retval = ret; break; }
+ skip_warn(); set_error_status( ret ); }
+ continue;
+ }
+ if( typeflag == tf_extended )
+ {
+ if( prev_extended && !cl_opts.permissive )
+ { show_file_error( ad.namep, fv_msg3 ); retval = 2; break; }
+ const int ret = ar.parse_records( extended, header, grbuf, extrec_msg,
+ cl_opts.permissive );
+ if( ret != 0 )
+ { show_file_error( ad.namep, ar.e_msg(), ar.e_code() );
+ if( ar.fatal() ) { retval = ret; break; }
+ skip_warn(); extended.reset(); set_error_status( ret ); }
+ else if( !extended.crc_present() && cl_opts.missing_crc )
+ { show_file_error( ad.namep, miscrc_msg ); retval = 2; break; }
+ prev_extended = true; continue;
+ }
+ prev_extended = false;
+
+ extended.fill_from_ustar( header ); // copy metadata from header
+
+ try {
+ // members without name are skipped except when listing
+ if( check_skip_filename( cl_opts, name_pending, extended.path().c_str(),
+ chdir_fd ) ) retval = skip_member( ar, extended, typeflag );
+ else
+ {
+ print_removed_prefix( extended.removed_prefix );
+ if( cl_opts.program_mode == m_list )
+ retval = list_member( ar, extended, header );
+ else if( extended.path().empty() )
+ retval = skip_member( ar, extended, typeflag );
+ else if( cl_opts.program_mode == m_diff )
+ retval = compare_member( cl_opts, ar, extended, header );
+ else retval = extract_member( cl_opts, ar, extended, header );
+ }
+ }
+ catch( Chdir_error & ) { retval = 1; }
+ extended.reset();
+ if( retval )
+ { show_error( "Error is not recoverable: exiting now." ); break; }
+ }
+
+ if( close( ad.infd ) != 0 && retval == 0 )
+ { show_file_error( ad.namep, eclosa_msg, errno ); retval = 1; }
+
+ if( retval == 0 )
+ for( int i = 0; i < cl_opts.parser.arguments(); ++i )
+ if( nonempty_arg( cl_opts.parser, i ) && name_pending[i] )
+ { show_file_error( cl_opts.parser.argument( i ).c_str(), nfound_msg );
+ retval = 1; }
+ return final_exit_status( retval, cl_opts.program_mode != m_diff );
+ }
diff --git a/decode.h b/decode.h
new file mode 100644
index 0000000..05d3072
--- /dev/null
+++ b/decode.h
@@ -0,0 +1,35 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+inline bool data_may_follow( const Typeflag typeflag )
+ { return typeflag <= 0 || typeflag >= 7; }
+
+inline bool uid_gid_in_range( const long long uid, const long long gid )
+ { return uid == (long long)( (uid_t)uid ) &&
+ gid == (long long)( (gid_t)gid ); }
+
+const char * const dotdot_msg = "Contains a '..' component, skipping.";
+const char * const cantln_msg = "Can't %slink '%s' to '%s'";
+const char * const mkdir_msg = "Can't create directory";
+const char * const mknod_msg = "Can't create device node";
+const char * const mkfifo_msg = "Can't create FIFO file";
+const char * const uftype_msg = "%s: Unknown file type '%c', skipping.";
+const char * const chown_msg = "Can't change file owner";
+
+mode_t get_umask();
+
+struct Chdir_error {};
diff --git a/decode_lz.cc b/decode_lz.cc
new file mode 100644
index 0000000..867ffa5
--- /dev/null
+++ b/decode_lz.cc
@@ -0,0 +1,765 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdio>
+#include <queue>
+#include <pthread.h>
+#include <stdint.h> // for lzlib.h
+#include <unistd.h>
+#include <utime.h>
+#include <sys/stat.h>
+#if !defined __FreeBSD__ && !defined __OpenBSD__ && !defined __NetBSD__ && \
+ !defined __DragonFly__ && !defined __APPLE__ && !defined __OS2__
+#include <sys/sysmacros.h> // for major, minor, makedev
+#else
+#include <sys/types.h> // for major, minor, makedev
+#endif
+#include <lzlib.h>
+
+#include "tarlz.h"
+#include "arg_parser.h"
+#include "lzip_index.h"
+#include "archive_reader.h"
+#include "common_mutex.h"
+#include "decode.h"
+
+/* When a problem is detected by any worker:
+ - the worker requests mastership and returns.
+ - the courier discards new packets received or collected.
+ - the other workers return.
+ - the muxer drains the queue and returns. */
+
+namespace {
+
+const char * const other_msg = "Other worker found an error.";
+
+/* line is preformatted and newline terminated except for prefix, error.
+ ok with an empty line is a no-op. */
+struct Packet // member name and metadata or error message
+ {
+ enum Status { ok, member_done, diag, prefix, error1, error2 };
+
+ long member_id; // lzip member containing the header of this tar member
+ std::string line; // member name and metadata ready to print, if any
+ Status status; // diagnostics and errors go to stderr
+ int errcode; // for error
+ Packet( const long i, const char * const msg, const Status s, const int e )
+ : member_id( i ), line( msg ), status( s ), errcode( e ) {}
+ };
+
+
+class Packet_courier // moves packets around
+ {
+public:
+ unsigned ocheck_counter;
+ unsigned owait_counter;
+private:
+ long error_member_id; // first lzip member with error/misalign/eoa/eof
+ int deliver_worker_id; // worker queue currently delivering packets
+ int master_worker_id; // worker in charge if error/misalign/eoa/eof
+ std::vector< std::queue< const Packet * > > opacket_queues;
+ int num_working; // number of workers still running
+ const int num_workers; // number of workers
+ const unsigned out_slots; // max output packets per queue
+ pthread_mutex_t omutex;
+ pthread_cond_t oav_or_exit; // output packet available or all workers exited
+ std::vector< pthread_cond_t > slot_av; // output slot available
+ pthread_cond_t check_master;
+ bool eoa_found_; // EOA blocks found
+
+ Packet_courier( const Packet_courier & ); // declared as private
+ void operator=( const Packet_courier & ); // declared as private
+
+public:
+ Packet_courier( const int workers, const int slots )
+ : ocheck_counter( 0 ), owait_counter( 0 ),
+ error_member_id( -1 ), deliver_worker_id( 0 ), master_worker_id( -1 ),
+ opacket_queues( workers ), num_working( workers ),
+ num_workers( workers ), out_slots( slots ), slot_av( workers ),
+ eoa_found_( false )
+ {
+ xinit_mutex( &omutex ); xinit_cond( &oav_or_exit );
+ for( unsigned i = 0; i < slot_av.size(); ++i ) xinit_cond( &slot_av[i] );
+ xinit_cond( &check_master );
+ }
+
+ ~Packet_courier()
+ {
+ xdestroy_cond( &check_master );
+ for( unsigned i = 0; i < slot_av.size(); ++i ) xdestroy_cond( &slot_av[i] );
+ xdestroy_cond( &oav_or_exit ); xdestroy_mutex( &omutex );
+ }
+
+ bool eoa_found() const { return eoa_found_; }
+ void report_eoa() { eoa_found_ = true; }
+
+ bool mastership_granted() const { return master_worker_id >= 0; }
+
+ bool request_mastership( const long member_id, const int worker_id )
+ {
+ xlock( &omutex );
+ if( mastership_granted() ) // already granted
+ { xunlock( &omutex ); return ( master_worker_id == worker_id ); }
+ if( error_member_id < 0 || error_member_id > member_id )
+ error_member_id = member_id;
+ while( !mastership_granted() && ( worker_id != deliver_worker_id ||
+ !opacket_queues[deliver_worker_id].empty() ) )
+ xwait( &check_master, &omutex );
+ if( !mastership_granted() && worker_id == deliver_worker_id &&
+ opacket_queues[deliver_worker_id].empty() )
+ {
+ master_worker_id = worker_id; // grant mastership
+ for( int i = 0; i < num_workers; ++i ) // delete all packets
+ while( !opacket_queues[i].empty() )
+ opacket_queues[i].pop();
+ xbroadcast( &check_master );
+ xunlock( &omutex );
+ return true;
+ }
+ xunlock( &omutex );
+ return false; // mastership granted to another worker
+ }
+
+ void worker_finished()
+ {
+ // notify muxer when last worker exits
+ xlock( &omutex );
+ if( --num_working == 0 ) xsignal( &oav_or_exit );
+ xunlock( &omutex );
+ }
+
+ /* Collect a packet from a worker.
+ If a packet is rejected, the worker must terminate. */
+ bool collect_packet( const long member_id, const int worker_id,
+ const char * const msg, const Packet::Status status,
+ const int errcode = 0 )
+ {
+ const Packet * const opacket = new Packet( member_id, msg, status, errcode );
+ xlock( &omutex );
+ if( ( mastership_granted() && master_worker_id != worker_id ) ||
+ ( error_member_id >= 0 && error_member_id < opacket->member_id ) )
+ { xunlock( &omutex ); delete opacket; return false; } // reject packet
+ while( opacket_queues[worker_id].size() >= out_slots )
+ xwait( &slot_av[worker_id], &omutex );
+ opacket_queues[worker_id].push( opacket );
+ if( worker_id == deliver_worker_id ) xsignal( &oav_or_exit );
+ xunlock( &omutex );
+ return true;
+ }
+
+ /* Deliver a packet to muxer.
+ If packet.status == Packet::member_done, move to next queue.
+ If packet.line.empty(), wait again (empty lzip member). */
+ const Packet * deliver_packet()
+ {
+ const Packet * opacket = 0;
+ xlock( &omutex );
+ ++ocheck_counter;
+ while( true )
+ {
+ while( opacket_queues[deliver_worker_id].empty() && num_working > 0 )
+ {
+ ++owait_counter;
+ if( !mastership_granted() && error_member_id >= 0 )
+ xbroadcast( &check_master ); // mastership requested not yet granted
+ xwait( &oav_or_exit, &omutex );
+ }
+ if( opacket_queues[deliver_worker_id].empty() ) break;
+ opacket = opacket_queues[deliver_worker_id].front();
+ opacket_queues[deliver_worker_id].pop();
+ if( opacket_queues[deliver_worker_id].size() + 1 == out_slots )
+ xsignal( &slot_av[deliver_worker_id] );
+ if( opacket->status == Packet::member_done && !mastership_granted() )
+ { if( ++deliver_worker_id >= num_workers ) deliver_worker_id = 0; }
+ if( !opacket->line.empty() ) break;
+ delete opacket; opacket = 0;
+ }
+ xunlock( &omutex );
+ return opacket;
+ }
+
+ bool finished() // all packets delivered to muxer
+ {
+ if( num_working != 0 ) return false;
+ for( int i = 0; i < num_workers; ++i )
+ if( !opacket_queues[i].empty() ) return false;
+ return true;
+ }
+ };
+
+
+// prevent two threads from extracting the same file at the same time
+class Name_monitor
+ {
+ std::vector< unsigned > crc_vector;
+ std::vector< std::string > name_vector;
+ pthread_mutex_t mutex;
+
+public:
+ Name_monitor( const int num_workers )
+ : crc_vector( num_workers ), name_vector( num_workers )
+ { if( num_workers > 0 ) xinit_mutex( &mutex ); }
+
+ bool reserve_name( const unsigned worker_id, const std::string & filename )
+ {
+ // compare the CRCs of the names; compare the names if the CRCs collide
+ const unsigned crc =
+ crc32c.compute_crc( (const uint8_t *)filename.c_str(), filename.size() );
+ xlock( &mutex );
+ for( unsigned i = 0; i < crc_vector.size(); ++i )
+ if( crc_vector[i] == crc && crc != 0 && i != worker_id &&
+ name_vector[i] == filename )
+ { xunlock( &mutex ); return false; } // filename already reserved
+ crc_vector[worker_id] = crc; name_vector[worker_id] = filename;
+ xunlock( &mutex );
+ return true;
+ }
+ };
+
+
+struct Trival // triple result value
+ {
+ const char * msg;
+ int errcode;
+ int retval;
+ explicit Trival( const char * const s = 0, const int e = 0, const int r = 0 )
+ : msg( s ), errcode( e ), retval( r ) {}
+ };
+
+
+Trival skip_member_lz( Archive_reader_i & ar, Packet_courier & courier,
+ const Extended & extended, const long member_id,
+ const int worker_id, const Typeflag typeflag )
+ {
+ if( data_may_follow( typeflag ) )
+ { const int ret = ar.skip_member( extended );
+ if( ret != 0 ) return Trival( ar.e_msg(), ar.e_code(), ret ); }
+ if( ar.at_member_end() &&
+ !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) )
+ return Trival( other_msg, 0, 1);
+ return Trival();
+ }
+
+
+Trival compare_member_lz( const Cl_options & cl_opts,
+ Archive_reader_i & ar, Packet_courier & courier,
+ const Extended & extended, const Tar_header header,
+ Resizable_buffer & rbuf, const long member_id,
+ const int worker_id )
+ {
+ if( verbosity < 1 ) rbuf()[0] = 0;
+ else if( !format_member_name( extended, header, rbuf, verbosity > 1 ) )
+ return Trival( mem_msg, 0, 1 );
+ std::string estr, ostr;
+ const bool stat_differs =
+ !compare_file_type( estr, ostr, cl_opts, extended, header );
+ if( ( rbuf()[0] && !courier.collect_packet( member_id, worker_id, rbuf(),
+ Packet::ok ) ) ||
+ ( estr.size() && !courier.collect_packet( member_id, worker_id,
+ estr.c_str(), Packet::diag ) ) ||
+ ( ostr.size() && !courier.collect_packet( member_id, worker_id,
+ ostr.c_str(), Packet::ok ) ) ||
+ ( extended.file_size() <= 0 && ar.at_member_end() &&
+ !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) )
+ return Trival( other_msg, 0, 1 );
+ if( extended.file_size() <= 0 ) return Trival();
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ if( ( typeflag != tf_regular && typeflag != tf_hiperf ) || stat_differs )
+ return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag );
+ // else compare file contents
+ const char * const filename = extended.path().c_str();
+ const int infd2 = open_instream( filename );
+ if( infd2 < 0 ) { set_error_status( 1 );
+ return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag ); }
+ const int ret = compare_file_contents( estr, ostr, ar, extended.file_size(),
+ filename, infd2 );
+ if( ret != 0 ) return Trival( ar.e_msg(), ar.e_code(), ret );
+ if( ( estr.size() && !courier.collect_packet( member_id, worker_id,
+ estr.c_str(), Packet::diag ) ) ||
+ ( ostr.size() && !courier.collect_packet( member_id, worker_id,
+ ostr.c_str(), Packet::ok ) ) ||
+ ( ar.at_member_end() &&
+ !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) ) )
+ return Trival( other_msg, 0, 1 );
+ return Trival();
+ }
+
+
+Trival list_member_lz( Archive_reader_i & ar, Packet_courier & courier,
+ const Extended & extended, const Tar_header header,
+ Resizable_buffer & rbuf, const long member_id,
+ const int worker_id )
+ {
+ if( verbosity < 0 ) rbuf()[0] = 0;
+ else if( !format_member_name( extended, header, rbuf, verbosity > 0 ) )
+ return Trival( mem_msg, 0, 1 );
+ const int ret = data_may_follow( (Typeflag)header[typeflag_o] ) ?
+ ar.skip_member( extended ) : 0; // print name even on read error
+ if( !courier.collect_packet( member_id, worker_id, rbuf(),
+ ar.at_member_end() ? Packet::member_done : Packet::ok ) )
+ return Trival( other_msg, 0, 1 );
+ if( ret != 0 ) return Trival( ar.e_msg(), ar.e_code(), ret );
+ return Trival();
+ }
+
+
+Trival extract_member_lz( const Cl_options & cl_opts,
+ Archive_reader_i & ar, Packet_courier & courier,
+ const Extended & extended, const Tar_header header,
+ Resizable_buffer & rbuf, const long member_id,
+ const int worker_id, Name_monitor & name_monitor )
+ {
+ const char * const filename = extended.path().c_str();
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ if( contains_dotdot( filename ) )
+ {
+ if( format_file_error( rbuf, filename, dotdot_msg ) &&
+ !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) )
+ return Trival( other_msg, 0, 1 );
+ return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag );
+ }
+ // skip member if another copy is already being extracted by another thread
+ if( !name_monitor.reserve_name( worker_id, extended.path() ) )
+ {
+ if( verbosity >= 3 && format_file_error( rbuf, filename,
+ "Is being extracted by another thread, skipping." ) &&
+ !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) )
+ return Trival( other_msg, 0, 1 );
+ return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag );
+ }
+ mode_t mode = parse_octal( header + mode_o, mode_l ); // 12 bits
+ if( geteuid() != 0 && !cl_opts.preserve_permissions ) mode &= ~get_umask();
+ int outfd = -1;
+
+ if( verbosity >= 1 )
+ {
+ if( !format_member_name( extended, header, rbuf, verbosity > 1 ) )
+ return Trival( mem_msg, 0, 1 );
+ if( !courier.collect_packet( member_id, worker_id, rbuf(), Packet::ok ) )
+ return Trival( other_msg, 0, 1 );
+ }
+ /* Remove file before extraction to prevent following links.
+ Don't remove an empty dir because other thread may need it. */
+ if( typeflag != tf_directory ) std::remove( filename );
+ if( !make_dirs( filename ) )
+ {
+ if( format_file_error( rbuf, filename, intdir_msg, errno ) &&
+ !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) )
+ return Trival( other_msg, 0, 1 );
+ set_error_status( 1 );
+ return skip_member_lz( ar, courier, extended, member_id, worker_id, typeflag );
+ }
+
+ switch( typeflag )
+ {
+ case tf_regular:
+ case tf_hiperf:
+ outfd = open_outstream( filename, true, &rbuf );
+ if( outfd < 0 )
+ {
+ if( verbosity >= 0 &&
+ !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) )
+ return Trival( other_msg, 0, 1 );
+ set_error_status( 1 );
+ return skip_member_lz( ar, courier, extended, member_id, worker_id,
+ typeflag );
+ }
+ break;
+ case tf_link:
+ case tf_symlink:
+ {
+ const char * const linkname = extended.linkpath().c_str();
+ const bool hard = typeflag == tf_link;
+ if( ( hard && link( linkname, filename ) != 0 ) ||
+ ( !hard && symlink( linkname, filename ) != 0 ) )
+ {
+ if( format_error( rbuf, errno, cantln_msg, hard ? "" : "sym",
+ linkname, filename ) &&
+ !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) )
+ return Trival( other_msg, 0, 1 );
+ set_error_status( 1 );
+ }
+ } break;
+ case tf_directory:
+ {
+ struct stat st;
+ bool exists = ( stat( filename, &st ) == 0 );
+ if( exists && !S_ISDIR( st.st_mode ) )
+ { exists = false; std::remove( filename ); }
+ if( !exists && mkdir( filename, mode ) != 0 && errno != EEXIST )
+ {
+ if( format_file_error( rbuf, filename, mkdir_msg, errno ) &&
+ !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) )
+ return Trival( other_msg, 0, 1 );
+ set_error_status( 1 );
+ }
+ } break;
+ case tf_chardev:
+ case tf_blockdev:
+ {
+ const unsigned dev =
+ makedev( parse_octal( header + devmajor_o, devmajor_l ),
+ parse_octal( header + devminor_o, devminor_l ) );
+ const int dmode = ( typeflag == tf_chardev ? S_IFCHR : S_IFBLK ) | mode;
+ if( mknod( filename, dmode, dev ) != 0 )
+ {
+ if( format_file_error( rbuf, filename, mknod_msg, errno ) &&
+ !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) )
+ return Trival( other_msg, 0, 1 );
+ set_error_status( 1 );
+ }
+ break;
+ }
+ case tf_fifo:
+ if( mkfifo( filename, mode ) != 0 )
+ {
+ if( format_file_error( rbuf, filename, mkfifo_msg, errno ) &&
+ !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) )
+ return Trival( other_msg, 0, 1 );
+ set_error_status( 1 );
+ }
+ break;
+ default:
+ if( format_error( rbuf, 0, uftype_msg, filename, typeflag ) &&
+ !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) )
+ return Trival( other_msg, 0, 1 );
+ set_error_status( 2 );
+ return skip_member_lz( ar, courier, extended, member_id, worker_id,
+ typeflag );
+ }
+
+ const bool islink = ( typeflag == tf_link || typeflag == tf_symlink );
+ errno = 0;
+ if( !islink &&
+ ( !uid_gid_in_range( extended.get_uid(), extended.get_gid() ) ||
+ chown( filename, extended.get_uid(), extended.get_gid() ) != 0 ) )
+ {
+ if( outfd >= 0 ) mode &= ~( S_ISUID | S_ISGID | S_ISVTX );
+ // chown in many cases returns with EPERM, which can be safely ignored.
+ if( errno != EPERM && errno != EINVAL )
+ {
+ if( format_file_error( rbuf, filename, chown_msg, errno ) &&
+ !courier.collect_packet( member_id, worker_id, rbuf(), Packet::diag ) )
+ return Trival( other_msg, 0, 1 );
+ set_error_status( 1 );
+ }
+ }
+
+ if( outfd >= 0 ) fchmod( outfd, mode ); // ignore errors
+
+ if( data_may_follow( typeflag ) )
+ {
+ const int bufsize = 32 * header_size;
+ uint8_t buf[bufsize];
+ long long rest = extended.file_size();
+ const int rem = rest % header_size;
+ const int padding = rem ? header_size - rem : 0;
+ while( rest > 0 )
+ {
+ const int rsize = ( rest >= bufsize ) ? bufsize : rest + padding;
+ const int ret = ar.read( buf, rsize );
+ if( ret != 0 )
+ {
+ if( outfd >= 0 )
+ {
+ if( cl_opts.keep_damaged )
+ { writeblock( outfd, buf, std::min( rest, (long long)ar.e_size() ) );
+ close( outfd ); }
+ else { close( outfd ); std::remove( filename ); }
+ }
+ return Trival( ar.e_msg(), ar.e_code(), ret );
+ }
+ const int wsize = ( rest >= bufsize ) ? bufsize : rest;
+ if( outfd >= 0 && writeblock( outfd, buf, wsize ) != wsize )
+ { format_file_error( rbuf, filename, werr_msg, errno );
+ return Trival( rbuf(), 0, 1 ); }
+ rest -= wsize;
+ }
+ }
+ if( outfd >= 0 && close( outfd ) != 0 )
+ { format_file_error( rbuf, filename, eclosf_msg, errno );
+ return Trival( rbuf(), 0, 1 ); }
+ if( !islink )
+ {
+ struct utimbuf t;
+ t.actime = extended.atime().sec();
+ t.modtime = extended.mtime().sec();
+ utime( filename, &t ); // ignore errors
+ }
+ if( ar.at_member_end() &&
+ !courier.collect_packet( member_id, worker_id, "", Packet::member_done ) )
+ return Trival( other_msg, 0, 1 );
+ return Trival();
+ }
+
+
+struct Worker_arg
+ {
+ const Cl_options * cl_opts;
+ const Archive_descriptor * ad;
+ Packet_courier * courier;
+ Name_monitor * name_monitor;
+ std::vector< char > * name_pending;
+ int worker_id;
+ int num_workers;
+ };
+
+
+/* Read lzip members from archive, decode their tar members, and give the
+ packets produced to courier.
+*/
+extern "C" void * dworker( void * arg )
+ {
+ const Worker_arg & tmp = *(const Worker_arg *)arg;
+ const Cl_options & cl_opts = *tmp.cl_opts;
+ const Archive_descriptor & ad = *tmp.ad;
+ Packet_courier & courier = *tmp.courier;
+ Name_monitor & name_monitor = *tmp.name_monitor;
+ std::vector< char > & name_pending = *tmp.name_pending;
+ const int worker_id = tmp.worker_id;
+ const int num_workers = tmp.num_workers;
+
+ bool master = false;
+ Resizable_buffer rbuf;
+ Archive_reader_i ar( ad ); // 1 of N parallel readers
+ if( !rbuf.size() || ar.fatal() )
+ { if( courier.request_mastership( worker_id, worker_id ) )
+ courier.collect_packet( worker_id, worker_id, mem_msg, Packet::error1 );
+ goto done; }
+
+ for( long i = worker_id; !master && i < ad.lzip_index.members(); i += num_workers )
+ {
+ if( ad.lzip_index.dblock( i ).size() <= 0 ) // empty lzip member
+ {
+ if( courier.collect_packet( i, worker_id, "", Packet::member_done ) )
+ continue; else break;
+ }
+
+ long long data_end = ad.lzip_index.dblock( i ).end();
+ Extended extended; // metadata from extended records
+ bool prev_extended = false; // prev header was extended
+ ar.set_member( i ); // prepare for new member
+ while( true ) // process one tar header per iteration
+ {
+ if( ar.data_pos() >= data_end ) // dblock.end or udata_size
+ {
+ if( ar.data_pos() == data_end && !prev_extended ) break;
+ // member end exceeded or ends in extended, process rest of file
+ if( !courier.request_mastership( i, worker_id ) ) goto done;
+ master = true;
+ if( data_end >= ad.lzip_index.udata_size() )
+ { courier.collect_packet( i, worker_id, end_msg, Packet::error2 );
+ goto done; }
+ data_end = ad.lzip_index.udata_size();
+ if( ar.data_pos() == data_end && !prev_extended ) break;
+ }
+ Tar_header header;
+ const int ret = ar.read( header, header_size );
+ if( ret != 0 )
+ { if( courier.request_mastership( i, worker_id ) )
+ courier.collect_packet( i, worker_id, ar.e_msg(),
+ ( ret == 1 ) ? Packet::error1 : Packet::error2, ar.e_code() );
+ goto done; }
+ if( !check_ustar_chksum( header ) ) // error or EOA
+ {
+ if( !courier.request_mastership( i, worker_id ) ) goto done;
+ if( block_is_zero( header, header_size ) ) // EOA
+ {
+ if( !prev_extended || cl_opts.permissive ) courier.report_eoa();
+ else courier.collect_packet( i, worker_id, fv_msg1, Packet::error2 );
+ goto done;
+ }
+ courier.collect_packet( i, worker_id, ( ar.data_pos() > header_size ) ?
+ bad_hdr_msg : posix_lz_msg, Packet::error2 );
+ goto done;
+ }
+
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ if( typeflag == tf_global )
+ {
+ const char * msg = 0; int ret = 2;
+ Extended dummy; // global headers are parsed and ignored
+ if( prev_extended && !cl_opts.permissive ) msg = fv_msg2;
+ else if( ( ret = ar.parse_records( dummy, header, rbuf, gblrec_msg,
+ true ) ) != 0 ) msg = ar.e_msg();
+ else
+ {
+ if( ar.data_pos() == data_end && // end of lzip member or EOF
+ !courier.collect_packet( i, worker_id, "", Packet::member_done ) )
+ goto done;
+ continue;
+ }
+ if( courier.request_mastership( i, worker_id ) )
+ courier.collect_packet( i, worker_id, msg, ( ret == 1 ) ?
+ Packet::error1 : Packet::error2 );
+ goto done;
+ }
+ if( typeflag == tf_extended )
+ {
+ const char * msg = 0; int ret = 2;
+ if( prev_extended && !cl_opts.permissive ) msg = fv_msg3;
+ else if( ( ret = ar.parse_records( extended, header, rbuf, extrec_msg,
+ cl_opts.permissive ) ) != 0 ) msg = ar.e_msg();
+ else if( !extended.crc_present() && cl_opts.missing_crc )
+ { msg = miscrc_msg; ret = 2; }
+ else { prev_extended = true; continue; }
+ if( courier.request_mastership( i, worker_id ) )
+ courier.collect_packet( i, worker_id, msg, ( ret == 1 ) ?
+ Packet::error1 : Packet::error2 );
+ goto done;
+ }
+ prev_extended = false;
+
+ extended.fill_from_ustar( header ); // copy metadata from header
+
+ /* Skip members with an empty name in the ustar header. If there is an
+ extended header in a previous lzip member, its worker will request
+ mastership. Else the ustar-only unnamed member will be ignored. */
+ Trival trival;
+ if( check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) )
+ trival = skip_member_lz( ar, courier, extended, i, worker_id, typeflag );
+ else
+ {
+ std::string rpmsg;
+ if( print_removed_prefix( extended.removed_prefix, &rpmsg ) &&
+ !courier.collect_packet( i, worker_id, rpmsg.c_str(), Packet::prefix ) )
+ { trival = Trival( other_msg, 0, 1 ); goto fatal; }
+ if( cl_opts.program_mode == m_list )
+ trival = list_member_lz( ar, courier, extended, header, rbuf, i, worker_id );
+ else if( extended.path().empty() )
+ trival = skip_member_lz( ar, courier, extended, i, worker_id, typeflag );
+ else if( cl_opts.program_mode == m_diff )
+ trival = compare_member_lz( cl_opts, ar, courier, extended, header,
+ rbuf, i, worker_id );
+ else trival = extract_member_lz( cl_opts, ar, courier, extended, header,
+ rbuf, i, worker_id, name_monitor );
+ }
+ if( trival.retval ) // fatal error
+fatal: { if( courier.request_mastership( i, worker_id ) )
+ courier.collect_packet( i, worker_id, trival.msg,
+ ( trival.retval == 1 ) ? Packet::error1 : Packet::error2,
+ trival.errcode );
+ goto done; }
+ extended.reset();
+ }
+ }
+done:
+ courier.worker_finished();
+ return 0;
+ }
+
+
+/* Get from courier the processed and sorted packets, and print
+ the member lines on stdout or the diagnostics and errors on stderr.
+*/
+void muxer( const char * const archive_namep, Packet_courier & courier )
+ {
+ int retval = 0;
+ while( retval == 0 )
+ {
+ const Packet * const opacket = courier.deliver_packet();
+ if( !opacket ) break; // queue is empty. all workers exited
+
+ switch( opacket->status )
+ {
+ case Packet::error1:
+ case Packet::error2:
+ show_file_error( archive_namep, opacket->line.c_str(), opacket->errcode );
+ retval = ( opacket->status == Packet::error1 ) ? 1 : 2; break;
+ case Packet::prefix: show_error( opacket->line.c_str() ); break;
+ case Packet::diag: std::fputs( opacket->line.c_str(), stderr ); break;
+ default: if( opacket->line.size() )
+ { std::fputs( opacket->line.c_str(), stdout ); std::fflush( stdout ); }
+ }
+ delete opacket;
+ }
+ if( retval == 0 && !courier.eoa_found() ) // no worker found EOA blocks
+ { show_file_error( archive_namep, end_msg ); retval = 2; }
+ if( retval ) exit_fail_mt( retval );
+ }
+
+} // end namespace
+
+
+// init the courier, then start the workers and call the muxer.
+int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad,
+ std::vector< char > & name_pending )
+ {
+ const int out_slots = 65536; // max small files (<=512B) in 64 MiB
+ const int num_workers = // limited to number of members
+ std::min( (long)cl_opts.num_workers, ad.lzip_index.members() );
+ if( cl_opts.program_mode == m_extract ) get_umask(); // cache the umask
+ Name_monitor
+ name_monitor( ( cl_opts.program_mode == m_extract ) ? num_workers : 0 );
+
+ /* If an error happens after any threads have been started, exit must be
+ called before courier goes out of scope. */
+ Packet_courier courier( num_workers, out_slots );
+
+ Worker_arg * worker_args = new( std::nothrow ) Worker_arg[num_workers];
+ pthread_t * worker_threads = new( std::nothrow ) pthread_t[num_workers];
+ if( !worker_args || !worker_threads ) { show_error( mem_msg ); return 1; }
+ for( int i = 0; i < num_workers; ++i )
+ {
+ worker_args[i].cl_opts = &cl_opts;
+ worker_args[i].ad = &ad;
+ worker_args[i].courier = &courier;
+ worker_args[i].name_monitor = &name_monitor;
+ worker_args[i].name_pending = &name_pending;
+ worker_args[i].worker_id = i;
+ worker_args[i].num_workers = num_workers;
+ const int errcode =
+ pthread_create( &worker_threads[i], 0, dworker, &worker_args[i] );
+ if( errcode )
+ { show_error( "Can't create worker threads", errcode ); exit_fail_mt(); }
+ }
+
+ muxer( ad.namep, courier );
+
+ for( int i = num_workers - 1; i >= 0; --i )
+ {
+ const int errcode = pthread_join( worker_threads[i], 0 );
+ if( errcode )
+ { show_error( "Can't join worker threads", errcode ); exit_fail_mt(); }
+ }
+ delete[] worker_threads;
+ delete[] worker_args;
+
+ int retval = 0;
+ if( close( ad.infd ) != 0 )
+ { show_file_error( ad.namep, eclosa_msg, errno ); retval = 1; }
+
+ if( retval == 0 )
+ for( int i = 0; i < cl_opts.parser.arguments(); ++i )
+ if( nonempty_arg( cl_opts.parser, i ) && name_pending[i] )
+ { show_file_error( cl_opts.parser.argument( i ).c_str(), nfound_msg );
+ retval = 1; }
+
+ if( cl_opts.debug_level & 1 )
+ std::fprintf( stderr,
+ "muxer tried to consume from workers %8u times\n"
+ "muxer had to wait %8u times\n",
+ courier.ocheck_counter,
+ courier.owait_counter );
+
+ if( !courier.finished() ) internal_error( conofin_msg );
+ return final_exit_status( retval, cl_opts.program_mode != m_diff );
+ }
diff --git a/delete.cc b/delete.cc
new file mode 100644
index 0000000..6e54cf3
--- /dev/null
+++ b/delete.cc
@@ -0,0 +1,189 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cctype>
+#include <cerrno>
+#include <cstdio>
+#include <stdint.h> // for lzlib.h
+#include <unistd.h>
+#include <lzlib.h>
+
+#include "tarlz.h"
+#include "arg_parser.h"
+#include "lzip_index.h"
+#include "archive_reader.h"
+
+
+bool safe_seek( const int fd, const long long pos )
+ {
+ if( lseek( fd, pos, SEEK_SET ) == pos ) return true;
+ show_error( seek_msg, errno ); return false;
+ }
+
+
+int tail_copy( const Arg_parser & parser, const Archive_descriptor & ad,
+ std::vector< char > & name_pending, const long long istream_pos,
+ const int outfd, int retval )
+ {
+ const long long rest = ad.lzip_index.file_size() - istream_pos;
+ if( istream_pos > 0 && rest > 0 &&
+ ( !safe_seek( ad.infd, istream_pos ) ||
+ !copy_file( ad.infd, outfd, rest ) ) )
+ { show_file_error( ad.namep, "Error during tail copy." );
+ return retval ? retval : 1; }
+ const long long ostream_pos = lseek( outfd, 0, SEEK_CUR );
+ if( ostream_pos < 0 )
+ { show_file_error( ad.namep, seek_msg, errno ); retval = 1; }
+ else if( ostream_pos > 0 && ostream_pos < ad.lzip_index.file_size() )
+ {
+ int ret;
+ do ret = ftruncate( outfd, ostream_pos );
+ while( ret != 0 && errno == EINTR );
+ if( ret != 0 || lseek( outfd, 0, SEEK_END ) != ostream_pos )
+ {
+ show_file_error( ad.namep, "Can't truncate archive", errno );
+ if( retval < 1 ) retval = 1;
+ }
+ }
+
+ if( ( close( outfd ) | close( ad.infd ) ) != 0 && retval == 0 )
+ { show_file_error( ad.namep, eclosa_msg, errno ); retval = 1; }
+
+ if( retval == 0 )
+ for( int i = 0; i < parser.arguments(); ++i )
+ if( nonempty_arg( parser, i ) && name_pending[i] )
+ { show_file_error( parser.argument( i ).c_str(), nfound_msg );
+ retval = 1; }
+ return retval;
+ }
+
+
+/* Deleting from a corrupt archive must not worsen the corruption. Stop and
+ tail-copy as soon as corruption is found.
+*/
+int delete_members( const Cl_options & cl_opts )
+ {
+ if( cl_opts.num_files <= 0 )
+ { if( verbosity >= 1 ) show_error( "Nothing to delete." ); return 0; }
+ if( cl_opts.archive_name.empty() )
+ { show_error( "Deleting from stdin not implemented yet." ); return 1; }
+ const Archive_descriptor ad( cl_opts.archive_name );
+ if( ad.infd < 0 ) return 1;
+ const int outfd = open_outstream( cl_opts.archive_name, false );
+ if( outfd < 0 ) { close( ad.infd ); return 1; }
+
+ // mark member names to be deleted
+ std::vector< char > name_pending( cl_opts.parser.arguments(), false );
+ for( int i = 0; i < cl_opts.parser.arguments(); ++i )
+ if( nonempty_arg( cl_opts.parser, i ) &&
+ !Exclude::excluded( cl_opts.parser.argument( i ).c_str() ) )
+ name_pending[i] = true;
+
+ if( ad.indexed ) // archive is a compressed regular file
+ return delete_members_lz( cl_opts, ad, name_pending, outfd );
+ if( !ad.seekable )
+ { show_file_error( ad.namep, "Archive is not seekable." ); return 1; }
+ if( ad.lzip_index.file_size() < 3 * header_size )
+ { show_file_error( ad.namep, has_lz_ext( ad.name ) ?
+ posix_lz_msg : posix_msg ); return 2; }
+ // archive is uncompressed seekable, unless compressed corrupt
+
+ Archive_reader ar( ad ); // serial reader
+ Resizable_buffer rbuf;
+ long long istream_pos = 0; // source of next data move
+ long long member_begin = 0; // first pos of current tar member
+ Extended extended; // metadata from extended records
+ int retval = 0;
+ bool prev_extended = false; // prev header was extended
+ if( !rbuf.size() ) { show_error( mem_msg ); return 1; }
+
+ while( true ) // process one tar header per iteration
+ {
+ if( !prev_extended && ( member_begin = lseek( ad.infd, 0, SEEK_CUR ) ) < 0 )
+ { show_file_error( ad.namep, seek_msg, errno ); retval = 1; break; }
+ Tar_header header;
+ if( ( retval = ar.read( header, header_size ) ) != 0 )
+ { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); break; }
+ if( !check_ustar_chksum( header ) ) // error or EOA
+ {
+ if( block_is_zero( header, header_size ) ) // EOA
+ {
+ if( prev_extended && !cl_opts.permissive )
+ { show_file_error( ad.namep, fv_msg1 ); retval = 2; }
+ break;
+ }
+ // posix format already checked by archive reader
+ show_file_error( ad.namep, bad_hdr_msg );
+ retval = 2; break;
+ }
+
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ if( typeflag == tf_global )
+ {
+ if( prev_extended && !cl_opts.permissive )
+ { show_file_error( ad.namep, fv_msg2 ); retval = 2; break; }
+ Extended dummy; // global headers are parsed and ignored
+ retval = ar.parse_records( dummy, header, rbuf, gblrec_msg, true );
+ if( retval )
+ { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); break; }
+ continue;
+ }
+ if( typeflag == tf_extended )
+ {
+ if( prev_extended && !cl_opts.permissive )
+ { show_file_error( ad.namep, fv_msg3 ); retval = 2; break; }
+ if( ( retval = ar.parse_records( extended, header, rbuf, extrec_msg,
+ cl_opts.permissive ) ) != 0 )
+ { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); break; }
+ if( !extended.crc_present() && cl_opts.missing_crc )
+ { show_file_error( ad.namep, miscrc_msg ); retval = 2; break; }
+ prev_extended = true; continue;
+ }
+ prev_extended = false;
+
+ extended.fill_from_ustar( header ); // copy metadata from header
+
+ if( ( retval = ar.skip_member( extended ) ) != 0 )
+ { show_file_error( ad.namep, seek_msg, errno ); break; }
+
+ // delete tar member
+ if( !check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) )
+ {
+ print_removed_prefix( extended.removed_prefix );
+ if( !show_member_name( extended, header, 1, rbuf ) )
+ { retval = 1; break; }
+ const long long pos = lseek( ad.infd, 0, SEEK_CUR );
+ if( pos <= 0 || pos <= member_begin || member_begin < istream_pos )
+ { show_file_error( ad.namep, seek_msg, errno ); retval = 1; break; }
+ const long long size = member_begin - istream_pos;
+ if( size > 0 ) // move pending data each time a member is deleted
+ {
+ if( istream_pos == 0 )
+ { if( !safe_seek( outfd, size ) ) { retval = 1; break; } }
+ else if( !safe_seek( ad.infd, istream_pos ) ||
+ !copy_file( ad.infd, outfd, size ) ||
+ !safe_seek( ad.infd, pos ) ) { retval = 1; break; }
+ }
+ istream_pos = pos;
+ }
+ extended.reset();
+ }
+
+ return tail_copy( cl_opts.parser, ad, name_pending, istream_pos, outfd, retval );
+ }
diff --git a/delete_lz.cc b/delete_lz.cc
new file mode 100644
index 0000000..b67efa0
--- /dev/null
+++ b/delete_lz.cc
@@ -0,0 +1,138 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cctype>
+#include <cerrno>
+#include <cstdio>
+#include <stdint.h> // for lzlib.h
+#include <unistd.h>
+#include <lzlib.h>
+
+#include "tarlz.h"
+#include "arg_parser.h"
+#include "lzip_index.h"
+#include "archive_reader.h"
+
+
+/* Deleting from a corrupt archive must not worsen the corruption. Stop and
+ tail-copy as soon as corruption is found.
+*/
+int delete_members_lz( const Cl_options & cl_opts,
+ const Archive_descriptor & ad,
+ std::vector< char > & name_pending,
+ const int outfd )
+ {
+ Archive_reader_i ar( ad ); // indexed reader
+ Resizable_buffer rbuf;
+ if( !rbuf.size() || ar.fatal() ) { show_error( mem_msg ); return 1; }
+
+ long long istream_pos = 0; // source of next data move
+ int retval = 0, retval2 = 0;
+ for( long i = 0; i < ad.lzip_index.members(); ++i )
+ {
+ if( ad.lzip_index.dblock( i ).size() == 0 ) continue; // empty lzip member
+ long long member_begin = 0; // first pos of current tar member
+ Extended extended; // metadata from extended records
+ bool prev_extended = false; // prev header was extended
+ ar.set_member( i ); // prepare for new member
+ while( true ) // process one tar header per iteration
+ {
+ if( ar.data_pos() >= ar.mdata_end() )
+ {
+ if( ar.at_member_end() && !prev_extended ) break;
+ // member end exceeded or ends in extended
+ show_file_error( ad.namep, "Member misalignment found." );
+ retval = 2; goto done;
+ }
+ if( !prev_extended ) member_begin = ar.data_pos();
+ Tar_header header;
+ if( ( retval = ar.read( header, header_size ) ) != 0 )
+ { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); goto done; }
+ if( !check_ustar_chksum( header ) ) // error or EOA
+ {
+ if( block_is_zero( header, header_size ) ) // EOA
+ {
+ if( prev_extended && !cl_opts.permissive )
+ { show_file_error( ad.namep, fv_msg1 ); retval = 2; }
+ goto done;
+ }
+ // indexed archive reader does not check posix format
+ show_file_error( ad.namep, ( ar.data_pos() > header_size ) ?
+ bad_hdr_msg : posix_lz_msg );
+ retval = 2;
+ goto done;
+ }
+
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ if( typeflag == tf_global )
+ {
+ if( prev_extended && !cl_opts.permissive )
+ { show_file_error( ad.namep, fv_msg2 ); retval = 2; goto done; }
+ Extended dummy; // global headers are parsed and ignored
+ retval = ar.parse_records( dummy, header, rbuf, gblrec_msg, true );
+ if( retval )
+ { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); goto done; }
+ continue;
+ }
+ if( typeflag == tf_extended )
+ {
+ if( prev_extended && !cl_opts.permissive )
+ { show_file_error( ad.namep, fv_msg3 ); retval = 2; goto done; }
+ if( ( retval = ar.parse_records( extended, header, rbuf, extrec_msg,
+ cl_opts.permissive ) ) != 0 )
+ { show_file_error( ad.namep, ar.e_msg(), ar.e_code() ); goto done; }
+ if( !extended.crc_present() && cl_opts.missing_crc )
+ { show_file_error( ad.namep, miscrc_msg ); retval = 2; goto done; }
+ prev_extended = true; continue;
+ }
+ prev_extended = false;
+
+ extended.fill_from_ustar( header ); // copy metadata from header
+
+ if( ( retval = ar.skip_member( extended ) ) != 0 ) goto done;
+
+ // delete tar member
+ if( !check_skip_filename( cl_opts, name_pending, extended.path().c_str() ) )
+ {
+ print_removed_prefix( extended.removed_prefix );
+ // check that members match
+ if( member_begin != ad.lzip_index.dblock( i ).pos() || !ar.at_member_end() )
+ { show_file_error( extended.path().c_str(),
+ "Can't delete: not compressed individually." );
+ retval2 = 2; extended.reset(); continue; }
+ if( !show_member_name( extended, header, 1, rbuf ) )
+ { retval = 1; goto done; }
+ const long long size = ad.lzip_index.mblock( i ).pos() - istream_pos;
+ if( size > 0 ) // move pending data each time a member is deleted
+ {
+ if( istream_pos == 0 )
+ { if( !safe_seek( outfd, size ) ) { retval = 1; goto done; } }
+ else if( !safe_seek( ad.infd, istream_pos ) ||
+ !copy_file( ad.infd, outfd, size ) ) { retval = 1; goto done; }
+ }
+ istream_pos = ad.lzip_index.mblock( i ).end(); // member end
+ }
+ extended.reset();
+ }
+ }
+done:
+ if( retval < retval2 ) retval = retval2;
+ // tail copy keeps trailing data
+ return tail_copy( cl_opts.parser, ad, name_pending, istream_pos, outfd, retval );
+ }
diff --git a/doc/tarlz.1 b/doc/tarlz.1
new file mode 100644
index 0000000..9d63da5
--- /dev/null
+++ b/doc/tarlz.1
@@ -0,0 +1,180 @@
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.2.
+.TH TARLZ "1" "January 2024" "tarlz 0.25" "User Commands"
+.SH NAME
+tarlz \- creates tar archives with multimember lzip compression
+.SH SYNOPSIS
+.B tarlz
+\fI\,operation \/\fR[\fI\,options\/\fR] [\fI\,files\/\fR]
+.SH DESCRIPTION
+Tarlz is a massively parallel (multi\-threaded) combined implementation of
+the tar archiver and the lzip compressor. Tarlz uses the compression library
+lzlib.
+.PP
+Tarlz creates tar archives using a simplified and safer variant of the POSIX
+pax format compressed in lzip format, keeping the alignment between tar
+members and lzip members. The resulting multimember tar.lz archive is
+backward compatible with standard tar tools like GNU tar, which treat it
+like any other tar.lz archive. Tarlz can append files to the end of such
+compressed archives.
+.PP
+Keeping the alignment between tar members and lzip members has two
+advantages. It adds an indexed lzip layer on top of the tar archive, making
+it possible to decode the archive safely in parallel. It also minimizes the
+amount of data lost in case of corruption.
+.PP
+The tarlz file format is a safe POSIX\-style backup format. In case of
+corruption, tarlz can extract all the undamaged members from the tar.lz
+archive, skipping over the damaged members, just like the standard
+(uncompressed) tar. Moreover, the option '\-\-keep\-damaged' can be used to
+recover as much data as possible from each damaged member, and lziprecover
+can be used to recover some of the damaged members.
+.SS "Operations:"
+.TP
+\fB\-\-help\fR
+display this help and exit
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+output version information and exit
+.TP
+\fB\-A\fR, \fB\-\-concatenate\fR
+append archives to the end of an archive
+.TP
+\fB\-c\fR, \fB\-\-create\fR
+create a new archive
+.TP
+\fB\-d\fR, \fB\-\-diff\fR
+find differences between archive and file system
+.TP
+\fB\-\-delete\fR
+delete files/directories from an archive
+.TP
+\fB\-r\fR, \fB\-\-append\fR
+append files to the end of an archive
+.TP
+\fB\-t\fR, \fB\-\-list\fR
+list the contents of an archive
+.TP
+\fB\-x\fR, \fB\-\-extract\fR
+extract files/directories from an archive
+.TP
+\fB\-z\fR, \fB\-\-compress\fR
+compress existing POSIX tar archives
+.TP
+\fB\-\-check\-lib\fR
+check version of lzlib and exit
+.SH OPTIONS
+.TP
+\fB\-B\fR, \fB\-\-data\-size=\fR<bytes>
+set target size of input data blocks [2x8=16 MiB]
+.TP
+\fB\-C\fR, \fB\-\-directory=\fR<dir>
+change to directory <dir>
+.TP
+\fB\-f\fR, \fB\-\-file=\fR<archive>
+use archive file <archive>
+.TP
+\fB\-h\fR, \fB\-\-dereference\fR
+follow symlinks; archive the files they point to
+.TP
+\fB\-n\fR, \fB\-\-threads=\fR<n>
+set number of (de)compression threads [2]
+.TP
+\fB\-o\fR, \fB\-\-output=\fR<file>
+compress to <file> ('\-' for stdout)
+.TP
+\fB\-p\fR, \fB\-\-preserve\-permissions\fR
+don't subtract the umask on extraction
+.TP
+\fB\-q\fR, \fB\-\-quiet\fR
+suppress all messages
+.TP
+\fB\-v\fR, \fB\-\-verbose\fR
+verbosely list files processed
+.TP
+\fB\-0\fR .. \fB\-9\fR
+set compression level [default 6]
+.TP
+\fB\-\-uncompressed\fR
+don't compress the archive created
+.TP
+\fB\-\-asolid\fR
+create solidly compressed appendable archive
+.TP
+\fB\-\-bsolid\fR
+create per block compressed archive (default)
+.TP
+\fB\-\-dsolid\fR
+create per directory compressed archive
+.TP
+\fB\-\-no\-solid\fR
+create per file compressed archive
+.TP
+\fB\-\-solid\fR
+create solidly compressed archive
+.TP
+\fB\-\-anonymous\fR
+equivalent to '\-\-owner=root \fB\-\-group\fR=\fI\,root\/\fR'
+.TP
+\fB\-\-owner=\fR<owner>
+use <owner> name/ID for files added to archive
+.TP
+\fB\-\-group=\fR<group>
+use <group> name/ID for files added to archive
+.TP
+\fB\-\-exclude=\fR<pattern>
+exclude files matching a shell pattern
+.TP
+\fB\-\-ignore\-ids\fR
+ignore differences in owner and group IDs
+.TP
+\fB\-\-ignore\-metadata\fR
+compare only file size and file content
+.TP
+\fB\-\-ignore\-overflow\fR
+ignore mtime overflow differences on 32\-bit
+.TP
+\fB\-\-keep\-damaged\fR
+don't delete partially extracted files
+.TP
+\fB\-\-missing\-crc\fR
+exit with error status if missing extended CRC
+.TP
+\fB\-\-mtime=\fR<date>
+use <date> as mtime for files added to archive
+.TP
+\fB\-\-out\-slots=\fR<n>
+number of 1 MiB output packets buffered [64]
+.TP
+\fB\-\-warn\-newer\fR
+warn if any file is newer than the archive
+.PP
+If no archive is specified, tarlz tries to read it from standard input or
+write it to standard output.
+.PP
+Exit status: 0 for a normal exit, 1 for environmental problems
+(file not found, files differ, invalid command\-line options, I/O errors,
+etc), 2 to indicate a corrupt or invalid input file, 3 for an internal
+consistency error (e.g., bug) which caused tarlz to panic.
+.SH "REPORTING BUGS"
+Report bugs to lzip\-bug@nongnu.org
+.br
+Tarlz home page: http://www.nongnu.org/lzip/tarlz.html
+.SH COPYRIGHT
+Copyright \(co 2024 Antonio Diaz Diaz.
+Using lzlib 1.14\-rc1
+License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>
+.br
+This is free software: you are free to change and redistribute it.
+There is NO WARRANTY, to the extent permitted by law.
+.SH "SEE ALSO"
+The full documentation for
+.B tarlz
+is maintained as a Texinfo manual. If the
+.B info
+and
+.B tarlz
+programs are properly installed at your site, the command
+.IP
+.B info tarlz
+.PP
+should give you access to the complete manual.
diff --git a/doc/tarlz.info b/doc/tarlz.info
new file mode 100644
index 0000000..25ba882
--- /dev/null
+++ b/doc/tarlz.info
@@ -0,0 +1,1287 @@
+This is tarlz.info, produced by makeinfo version 4.13+ from tarlz.texi.
+
+INFO-DIR-SECTION Archiving
+START-INFO-DIR-ENTRY
+* Tarlz: (tarlz). Archiver with multimember lzip compression
+END-INFO-DIR-ENTRY
+
+
+File: tarlz.info, Node: Top, Next: Introduction, Up: (dir)
+
+Tarlz Manual
+************
+
+This manual is for Tarlz (version 0.25, 3 January 2024).
+
+* Menu:
+
+* Introduction:: Purpose and features of tarlz
+* Invoking tarlz:: Command-line interface
+* Portable character set:: POSIX portable filename character set
+* File format:: Detailed format of the compressed archive
+* Amendments to pax format:: The reasons for the differences with pax
+* Program design:: Internal structure of tarlz
+* Multi-threaded decoding:: Limitations of parallel tar decoding
+* Minimum archive sizes:: Sizes required for full multi-threaded speed
+* Examples:: A small tutorial with examples
+* Problems:: Reporting bugs
+* Concept index:: Index of concepts
+
+
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This manual is free documentation: you have unlimited permission to copy,
+distribute, and modify it.
+
+
+File: tarlz.info, Node: Introduction, Next: Invoking tarlz, Prev: Top, Up: Top
+
+1 Introduction
+**************
+
+Tarlz is a massively parallel (multi-threaded) combined implementation of
+the tar archiver and the lzip compressor. Tarlz uses the compression
+library lzlib.
+
+ Tarlz creates tar archives using a simplified and safer variant of the
+POSIX pax format compressed in lzip format, keeping the alignment between
+tar members and lzip members. The resulting multimember tar.lz archive is
+backward compatible with standard tar tools like GNU tar, which treat it
+like any other tar.lz archive. Tarlz can append files to the end of such
+compressed archives.
+
+ Keeping the alignment between tar members and lzip members has two
+advantages. It adds an indexed lzip layer on top of the tar archive, making
+it possible to decode the archive safely in parallel. It also minimizes the
+amount of data lost in case of corruption. Compressing a tar archive with
+plzip may even double the amount of files lost for each lzip member damaged
+because it does not keep the members aligned.
+
+ Tarlz can create tar archives with five levels of compression
+granularity: per file ('--no-solid'), per block ('--bsolid', default), per
+directory ('--dsolid'), appendable solid ('--asolid'), and solid
+('--solid'). It can also create uncompressed tar archives.
+
+Of course, compressing each file (or each directory) individually can't
+achieve a compression ratio as high as compressing solidly the whole tar
+archive, but it has the following advantages:
+
+ * The resulting multimember tar.lz archive can be decompressed in
+ parallel, multiplying the decompression speed.
+
+ * New members can be appended to the archive (by removing the
+ end-of-archive member), and unwanted members can be deleted from the
+ archive. Just like an uncompressed tar archive.
+
+ * It is a safe POSIX-style backup format. In case of corruption, tarlz
+ can extract all the undamaged members from the tar.lz archive,
+ skipping over the damaged members, just like the standard
+ (uncompressed) tar. Moreover, the option '--keep-damaged' can be used
+ to recover as much data as possible from each damaged member, and
+ lziprecover can be used to recover some of the damaged members.
+
+ * A multimember tar.lz archive is usually smaller than the corresponding
+ solidly compressed tar.gz archive, except when individually
+ compressing files smaller than about 32 KiB.
+
+ Tarlz protects the extended records with a Cyclic Redundancy Check (CRC)
+in a way compatible with standard tar tools. *Note crc32::.
+
+ Tarlz does not understand other tar formats like 'gnu', 'oldgnu',
+'star', or 'v7'. The command 'tarlz -t -f archive.tar.lz > /dev/null' can
+be used to check that the format of the archive is compatible with tarlz.
+
+
+File: tarlz.info, Node: Invoking tarlz, Next: Portable character set, Prev: Introduction, Up: Top
+
+2 Invoking tarlz
+****************
+
+The format for running tarlz is:
+
+ tarlz OPERATION [OPTIONS] [FILES]
+
+All operations except '--concatenate' and '--compress' operate on whole
+trees if any FILE is a directory. All operations except '--compress'
+overwrite output files without warning. If no archive is specified, tarlz
+tries to read it from standard input or write it to standard output. Tarlz
+refuses to read archive data from a terminal or write archive data to a
+terminal. Tarlz detects when the archive being created or enlarged is among
+the files to be archived, appended, or concatenated, and skips it.
+
+ Tarlz does not use absolute file names nor file names above the current
+working directory (perhaps changed by option '-C'). On archive creation or
+appending tarlz archives the files specified, but removes from member names
+any leading and trailing slashes and any file name prefixes containing a
+'..' component. On extraction, leading and trailing slashes are also
+removed from member names, and archive members containing a '..' component
+in the file name are skipped. Tarlz does not follow symbolic links during
+extraction; not even symbolic links replacing intermediate directories.
+
+ On extraction and listing, tarlz removes leading './' strings from
+member names in the archive or given in the command line, so that
+'tarlz -xf foo ./bar baz' extracts members 'bar' and './baz' from archive
+'foo'.
+
+ If several compression levels or '--*solid' options are given, the last
+setting is used. For example '-9 --solid --uncompressed -1' is equivalent
+to '-1 --solid'.
+
+ tarlz supports the following operations:
+
+'--help'
+ Print an informative help message describing the options and exit.
+
+'-V'
+'--version'
+ Print the version number of tarlz on the standard output and exit.
+ This version number should be included in all bug reports.
+
+'-A'
+'--concatenate'
+ Append one or more archives to the end of an archive. If no archive is
+ specified with the option '-f', concatenate the input archives to
+ standard output. All the archives involved must be regular (seekable)
+ files, and must be either all compressed or all uncompressed.
+ Compressed and uncompressed archives can't be mixed. Compressed
+ archives must be multimember lzip files with the two end-of-archive
+ blocks plus any zero padding contained in the last lzip member of each
+ archive. The intermediate end-of-archive blocks are removed as each
+ new archive is concatenated. If the archive is uncompressed, tarlz
+ parses tar headers until it finds the end-of-archive blocks. Exit with
+ status 0 without modifying the archive if no FILES have been specified.
+
+ Concatenating archives containing files in common results in two or
+ more tar members with the same name in the resulting archive, which
+ may produce nondeterministic behavior during multi-threaded extraction.
+ *Note mt-extraction::.
+
+'-c'
+'--create'
+ Create a new archive from FILES.
+
+'-d'
+'--diff'
+ Compare and report differences between archive and file system. For
+ each tar member in the archive, check that the corresponding file in
+ the file system exists and is of the same type (regular file,
+ directory, etc). Report on standard output the differences found in
+ type, mode (permissions), owner and group IDs, modification time, file
+ size, file contents (of regular files), target (of symlinks) and
+ device number (of block/character special files).
+
+ As tarlz removes leading slashes from member names, the option '-C' may
+ be used in combination with '--diff' when absolute file names were used
+ on archive creation: 'tarlz -C / -d'. Alternatively, tarlz may be run
+ from the root directory to perform the comparison.
+
+'--delete'
+ Delete files and directories from an archive in place. It currently can
+ delete only from uncompressed archives and from archives with files
+ compressed individually ('--no-solid' archives). Note that files of
+ about '--data-size' or larger are compressed individually even if
+ '--bsolid' is used, and can therefore be deleted. Tarlz takes care to
+ not delete a tar member unless it is possible to do so. For example it
+ won't try to delete a tar member that is not compressed individually.
+ Even in the case of finding a corrupt member after having deleted some
+ member(s), tarlz stops and copies the rest of the file as soon as
+ corruption is found, leaving it just as corrupt as it was, but not
+ worse.
+
+ To delete a directory without deleting the files under it, use
+ 'tarlz --delete -f foo --exclude='dir/*' dir'. Deleting in place may
+ be dangerous. A corrupt archive, a power cut, or an I/O error may cause
+ data loss.
+
+'-r'
+'--append'
+ Append files to the end of an archive. The archive must be a regular
+ (seekable) file either compressed or uncompressed. Compressed members
+ can't be appended to an uncompressed archive, nor vice versa. If the
+ archive is compressed, it must be a multimember lzip file with the two
+ end-of-archive blocks plus any zero padding contained in the last lzip
+ member of the archive. It is possible to append files to an archive
+ with a different compression granularity. Appending works as follows;
+ first the end-of-archive blocks are removed, then the new members are
+ appended, and finally two new end-of-archive blocks are appended to
+ the archive. If the archive is uncompressed, tarlz parses and skips
+ tar headers until it finds the end-of-archive blocks. Exit with status
+ 0 without modifying the archive if no FILES have been specified.
+
+ Appending files already present in the archive results in two or more
+ tar members with the same name, which may produce nondeterministic
+ behavior during multi-threaded extraction. *Note mt-extraction::.
+
+'-t'
+'--list'
+ List the contents of an archive. If FILES are given, list only the
+ FILES given.
+
+'-x'
+'--extract'
+ Extract files from an archive. If FILES are given, extract only the
+ FILES given. Else extract all the files in the archive. To extract a
+ directory without extracting the files under it, use
+ 'tarlz -xf foo --exclude='dir/*' dir'. Tarlz removes files and empty
+ directories unconditionally before extracting over them. Other than
+ that, it does not make any special effort to extract a file over an
+ incompatible type of file. For example, extracting a file over a
+ non-empty directory usually fails.
+
+'-z'
+'--compress'
+ Compress existing POSIX tar archives aligning the lzip members to the
+ tar members with choice of granularity ('--bsolid' by default,
+ '--dsolid' works like '--asolid'). Exit with error status 2 if any
+ input archive is an empty file. The input archives are kept unchanged.
+ Existing compressed archives are not overwritten. A hyphen '-' used as
+ the name of an input archive reads from standard input and writes to
+ standard output (unless the option '--output' is used). Tarlz can be
+ used as compressor for GNU tar by using a command like
+ 'tar -c -Hustar foo | tarlz -z -o foo.tar.lz'. Tarlz can be used as
+ compressor for zupdate (zutils) by using a command like
+ 'zupdate --lz="tarlz -z" foo.tar.gz'. Note that tarlz only works
+ reliably on archives without global headers, or with global headers
+ whose content can be ignored.
+
+ The compression is reversible, including any garbage present after the
+ end-of-archive blocks. Tarlz stops parsing after the first
+ end-of-archive block is found, and then compresses the rest of the
+ archive. Unless solid compression is requested, the end-of-archive
+ blocks are compressed in a lzip member separated from the preceding
+ members and from any non-zero garbage following the end-of-archive
+ blocks. '--compress' implies plzip argument style, not tar style. Each
+ input archive is compressed to a file with the extension '.lz' added
+ unless the option '--output' is used. When '--output' is used, only
+ one input archive can be specified. '-f' can't be used with
+ '--compress'.
+
+'--check-lib'
+ Compare the version of lzlib used to compile tarlz with the version
+ actually being used at run time and exit. Report any differences
+ found. Exit with error status 1 if differences are found. A mismatch
+ may indicate that lzlib is not correctly installed or that a different
+ version of lzlib has been installed after compiling tarlz. Exit with
+ error status 2 if LZ_API_VERSION and LZ_version_string don't match.
+ 'tarlz -v --check-lib' shows the version of lzlib being used and the
+ value of LZ_API_VERSION (if defined). *Note Library version:
+ (lzlib)Library version.
+
+
+ tarlz supports the following options: *Note Argument syntax:
+(arg_parser)Argument syntax.
+
+'-B BYTES'
+'--data-size=BYTES'
+ Set target size of input data blocks for the option '--bsolid'. *Note
+ --bsolid::. Valid values range from 8 KiB to 1 GiB. Default value is
+ two times the dictionary size, except for option '-0' where it
+ defaults to 1 MiB. *Note Minimum archive sizes::.
+
+'-C DIR'
+'--directory=DIR'
+ Change to directory DIR. When creating, appending, comparing, or
+ extracting, the position of each '-C' option in the command line is
+ significant; it changes the current working directory for the following
+ FILES until a new '-C' option appears in the command line. '--list'
+ and '--delete' ignore any '-C' options specified. DIR is relative to
+ the then current working directory, perhaps changed by a previous '-C'
+ option.
+
+ Note that a process can only have one current working directory (CWD).
+ Therefore multi-threading can't be used to create or decode an archive
+ if a '-C' option appears after a (relative) file name in the command
+ line. (All file names are made relative when decoding).
+
+'-f ARCHIVE'
+'--file=ARCHIVE'
+ Use archive file ARCHIVE. A hyphen '-' used as an ARCHIVE argument
+ reads from standard input or writes to standard output.
+
+'-h'
+'--dereference'
+ Follow symbolic links during archive creation, appending or comparison.
+ Archive or compare the files they point to instead of the links
+ themselves.
+
+'-n N'
+'--threads=N'
+ Set the number of (de)compression threads, overriding the system's
+ default. Valid values range from 0 to "as many as your system can
+ support". A value of 0 disables threads entirely. If this option is
+ not used, tarlz tries to detect the number of processors in the system
+ and use it as default value. 'tarlz --help' shows the system's default
+ value. See the note about multi-threading in the option '-C' above.
+
+ Note that the number of usable threads is limited during compression to
+ ceil( uncompressed_size / data_size ) (*note Minimum archive sizes::),
+ and during decompression to the number of lzip members in the tar.lz
+ archive, which you can find by running 'lzip -lv archive.tar.lz'.
+
+'-o FILE'
+'--output=FILE'
+ Write the compressed output to FILE. '-o -' writes the compressed
+ output to standard output. Currently '--output' only works with
+ '--compress'.
+
+'-p'
+'--preserve-permissions'
+ On extraction, set file permissions as they appear in the archive.
+ This is the default behavior when tarlz is run by the superuser. The
+ default for other users is to subtract the umask of the user running
+ tarlz from the permissions specified in the archive.
+
+'-q'
+'--quiet'
+ Quiet operation. Suppress all messages.
+
+'-v'
+'--verbose'
+ Verbosely list files processed. Further -v's (up to 4) increase the
+ verbosity level.
+
+'-0 .. -9'
+ Set the compression level for '--create', '--append', and
+ '--compress'. The default compression level is '-6'. Like lzip, tarlz
+ also minimizes the dictionary size of the lzip members it creates,
+ reducing the amount of memory required for decompression.
+
+ Level Dictionary size Match length limit
+ -0 64 KiB 16 bytes
+ -1 1 MiB 5 bytes
+ -2 1.5 MiB 6 bytes
+ -3 2 MiB 8 bytes
+ -4 3 MiB 12 bytes
+ -5 4 MiB 20 bytes
+ -6 8 MiB 36 bytes
+ -7 16 MiB 68 bytes
+ -8 24 MiB 132 bytes
+ -9 32 MiB 273 bytes
+
+'--uncompressed'
+ With '--create', don't compress the tar archive created. Create an
+ uncompressed tar archive instead. With '--append', don't compress the
+ new members appended to the tar archive. Compressed members can't be
+ appended to an uncompressed archive, nor vice versa. '--uncompressed'
+ can be omitted if it can be deduced from the archive name. (An
+ uncompressed archive name lacks a '.lz' or '.tlz' extension).
+
+'--asolid'
+ When creating or appending to a compressed archive, use appendable
+ solid compression. All the files being added to the archive are
+ compressed into a single lzip member, but the end-of-archive blocks
+ are compressed into a separate lzip member. This creates a solidly
+ compressed appendable archive. Solid archives can't be created nor
+ decoded in parallel.
+
+'--bsolid'
+ When creating or appending to a compressed archive, use block
+ compression. Tar members are compressed together in a lzip member
+ until they approximate a target uncompressed size. The size can't be
+ exact because each solidly compressed data block must contain an
+ integer number of tar members. Block compression is the default
+ because it improves compression ratio for archives with many files
+ smaller than the block size. This option allows tarlz revert to
+ default behavior if, for example, it is invoked through an alias like
+ 'tar='tarlz --solid''. *Note --data-size::, to set the target block
+ size.
+
+'--dsolid'
+ When creating or appending to a compressed archive, compress each file
+ specified in the command line separately in its own lzip member, and
+ use solid compression for each directory specified in the command
+ line. The end-of-archive blocks are compressed into a separate lzip
+ member. This creates a compressed appendable archive with a separate
+ lzip member for each file or top-level directory specified.
+
+'--no-solid'
+ When creating or appending to a compressed archive, compress each file
+ separately in its own lzip member. The end-of-archive blocks are
+ compressed into a separate lzip member. This creates a compressed
+ appendable archive with a lzip member for each file.
+
+'--solid'
+ When creating or appending to a compressed archive, use solid
+ compression. The files being added to the archive, along with the
+ end-of-archive blocks, are compressed into a single lzip member. The
+ resulting archive is not appendable. No more files can be later
+ appended to the archive. Solid archives can't be created nor decoded
+ in parallel.
+
+'--anonymous'
+ Equivalent to '--owner=root --group=root'.
+
+'--owner=OWNER'
+ When creating or appending, use OWNER for files added to the archive.
+ If OWNER is not a valid user name, it is decoded as a decimal numeric
+ user ID.
+
+'--group=GROUP'
+ When creating or appending, use GROUP for files added to the archive.
+ If GROUP is not a valid group name, it is decoded as a decimal numeric
+ group ID.
+
+'--exclude=PATTERN'
+ Exclude files matching a shell pattern like '*.o'. A file is considered
+ to match if any component of the file name matches. For example, '*.o'
+ matches 'foo.o', 'foo.o/bar' and 'foo/bar.o'. If PATTERN contains a
+ '/', it matches a corresponding '/' in the file name. For example,
+ 'foo/*.o' matches 'foo/bar.o'. Multiple '--exclude' options can be
+ specified.
+
+'--ignore-ids'
+ Make '--diff' ignore differences in owner and group IDs. This option is
+ useful when comparing an '--anonymous' archive.
+
+'--ignore-metadata'
+ Make '--diff' ignore any differences in metadata (file permissions,
+ owner and group IDs, modification time). Compare only file type, file
+ size, and file content. This option is useful when file permissions
+ have not been fully restored because uid/gid changed on extraction.
+
+'--ignore-overflow'
+ Make '--diff' ignore differences in mtime caused by overflow on 32-bit
+ systems with a 32-bit time_t.
+
+'--keep-damaged'
+ Don't delete partially extracted files. If a decompression error
+ happens while extracting a file, keep the partial data extracted. Use
+ this option to recover as much data as possible from each damaged
+ member. It is recommended to run tarlz in single-threaded mode
+ ('--threads=0') when using this option.
+
+'--missing-crc'
+ Exit with error status 2 if the CRC of the extended records is
+ missing. When this option is used, tarlz detects any corruption in the
+ extended records (only limited by CRC collisions). But note that a
+ corrupt 'GNU.crc32' keyword, for example 'GNU.crc30', is reported as a
+ missing CRC instead of as a corrupt record. This misleading
+ 'Missing CRC' message is the consequence of a flaw in the POSIX pax
+ format; i.e., the lack of a mandatory check sequence of the extended
+ records. *Note crc32::.
+
+'--mtime=DATE'
+ When creating or appending, use DATE as the modification time for
+ files added to the archive instead of their actual modification times.
+ The value of DATE may be either '@' followed by the number of seconds
+ since (or before) the epoch, or a date in format
+ '[-]YYYY-MM-DD HH:MM:SS' or '[-]YYYY-MM-DDTHH:MM:SS', or the name of
+ an existing reference file starting with '.' or '/' whose modification
+ time is used. The time of day 'HH:MM:SS' in the date format is
+ optional and defaults to '00:00:00'. The epoch is
+ '1970-01-01 00:00:00 UTC'. Negative seconds or years define a
+ modification time before the epoch.
+
+'--out-slots=N'
+ Number of 1 MiB output packets buffered per worker thread during
+ multi-threaded creation or appending to compressed archives.
+ Increasing the number of packets may increase compression speed if the
+ files being archived are larger than 64 MiB compressed, but requires
+ more memory. Valid values range from 1 to 1024. The default value is
+ 64.
+
+'--warn-newer'
+ During archive creation, warn if any file being archived has a
+ modification time newer than the archive creation time. This option
+ may slow archive creation somewhat because it makes an extra call to
+ 'stat' after archiving each file, but it guarantees that file contents
+ were not modified during the creation of the archive. Note that the
+ file must be at least one second newer than the archive for it to be
+ detected as newer.
+
+
+ Exit status: 0 for a normal exit, 1 for environmental problems (file not
+found, files differ, invalid command-line options, I/O errors, etc), 2 to
+indicate a corrupt or invalid input file, 3 for an internal consistency
+error (e.g., bug) which caused tarlz to panic.
+
+
+File: tarlz.info, Node: Portable character set, Next: File format, Prev: Invoking tarlz, Up: Top
+
+3 POSIX portable filename character set
+***************************************
+
+The set of characters from which portable file names are constructed.
+
+ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
+ a b c d e f g h i j k l m n o p q r s t u v w x y z
+ 0 1 2 3 4 5 6 7 8 9 . _ -
+
+ The last three characters are the period, underscore, and hyphen-minus
+characters, respectively.
+
+ File names are identifiers. Therefore, archiving works better when file
+names use only the portable character set without spaces added.
+
+
+File: tarlz.info, Node: File format, Next: Amendments to pax format, Prev: Portable character set, Up: Top
+
+4 File format
+*************
+
+In the diagram below, a box like this:
+
++---+
+| | <-- the vertical bars might be missing
++---+
+
+ represents one byte; a box like this:
+
++==============+
+| |
++==============+
+
+ represents a variable number of bytes or a fixed but large number of
+bytes (for example 512).
+
+
+ A tar.lz file consists of one or more lzip members (compressed data
+sets). The members simply appear one after another in the file, with no
+additional information before, between, or after them.
+
+ Each lzip member contains one or more tar members in a simplified POSIX
+pax interchange format. The only pax typeflag value supported by tarlz (in
+addition to the typeflag values defined by the ustar format) is 'x'. The
+pax format is an extension on top of the ustar format that removes the size
+limitations of the ustar format.
+
+ Each tar member contains one file archived, and is represented by the
+following sequence:
+
+ * An optional extended header block followed by one or more blocks that
+ contain the extended header records as if they were the contents of a
+ file; i.e., the extended header records are included as the data for
+ this header block. This header block is of the form described in pax
+ header block, with a typeflag value of 'x'.
+
+ * A header block in ustar format that describes the file. Any fields
+ defined in the preceding optional extended header records override the
+ associated fields in this header block for this file.
+
+ * Zero or more blocks that contain the contents of the file.
+
+ Each tar member must be contiguously stored in a lzip member for the
+parallel decoding operations like '--list' to work. If any tar member is
+split over two or more lzip members, the archive must be decoded
+sequentially. *Note Multi-threaded decoding::.
+
+ At the end of the archive file there are two 512-byte blocks filled with
+binary zeros, interpreted as an end-of-archive indicator. These EOA blocks
+are either compressed in a separate lzip member or compressed along with the
+tar members contained in the last lzip member. For a compressed archive to
+be recognized by tarlz as appendable, the last lzip member must contain
+between 512 and 32256 zeros alone (without any non-zero bytes).
+
+ The diagram below shows the correspondence between each tar member
+(formed by one or two headers plus optional data) in the tar archive and
+each lzip member in the resulting multimember tar.lz archive, when per file
+compression is used: *Note File format: (lzip)File format.
+
+tar
++========+======+=================+===============+========+======+========+
+| header | data | extended header | extended data | header | data | EOA |
++========+======+=================+===============+========+======+========+
+
+tar.lz
++===============+=================================================+========+
+| member | member | member |
++===============+=================================================+========+
+
+
+4.1 Pax header block
+====================
+
+The pax header block is identical to the ustar header block described below
+except that the typeflag has the value 'x' (extended). The field 'size' is
+the size of the extended header data in bytes. Most other fields in the pax
+header block are zeroed on archive creation to prevent trouble if the
+archive is read by an ustar tool, and are ignored by tarlz on archive
+extraction. *Note flawed-compat::.
+
+ Tarlz limits the size of the pax extended header data so that the whole
+header set (extended header + extended data + ustar header) can be read and
+decoded in a buffer of size INT_MAX.
+
+ The pax extended header data consists of one or more records, each of
+them constructed as follows:
+'"%d %s=%s\n", <length>, <keyword>, <value>'
+
+ The fields <length> and <keyword> in the record must be limited to the
+portable character set (*note Portable character set::). The field <length>
+contains the decimal length of the record in bytes, including the trailing
+newline. The field <value> is stored as-is, without conversion to UTF-8 nor
+any other transformation. The fields are separated by the ASCII characters
+space, equal-sign, and newline.
+
+ These are the <keyword> values currently supported by tarlz:
+
+'atime'
+ The signed decimal representation of the access time of the following
+ file in seconds since (or before) the epoch, obtained from the function
+ 'stat'. The atime record is created only for files with a modification
+ time outside of the ustar range. *Note ustar-mtime::.
+
+'gid'
+ The unsigned decimal representation of the group ID of the group that
+ owns the following file. The gid record is created only for files with
+ a group ID greater than 2_097_151 (octal 7_777_777). *Note
+ ustar-uid-gid::.
+
+'linkpath'
+ The file name of a link being created to another file, of any type,
+ previously archived. This record overrides the field 'linkname' in the
+ following ustar header block. The following ustar header block
+ determines the type of link created. If typeflag of the following
+ header block is 1, a hard link is created. If typeflag is 2, a
+ symbolic link is created and the linkpath value is used as the
+ contents of the symbolic link. The linkpath record is created only for
+ links with a link name that does not fit in the space provided by the
+ ustar header.
+
+'mtime'
+ The signed decimal representation of the modification time of the
+ following file in seconds since (or before) the epoch, obtained from
+ the function 'stat'. This record overrides the field 'mtime' in the
+ following ustar header block. The mtime record is created only for
+ files with a modification time outside of the ustar range. *Note
+ ustar-mtime::.
+
+'path'
+ The file name of the following file. This record overrides the fields
+ 'name' and 'prefix' in the following ustar header block. The path
+ record is created for files with a name that does not fit in the space
+ provided by the ustar header, but is also created for files that
+ require any other extended record so that the fields 'name' and
+ 'prefix' in the following ustar header block can be zeroed.
+
+'size'
+ The size of the file in bytes, expressed as a decimal number using
+ digits from the ISO/IEC 646:1991 (ASCII) standard. This record
+ overrides the field 'size' in the following ustar header block. The
+ size record is created only for files with a size value greater than
+ 8_589_934_591 (octal 77_777_777_777); that is, 8 GiB (2^33 bytes) or
+ larger.
+
+'uid'
+ The unsigned decimal representation of the user ID of the file owner
+ of the following file. The uid record is created only for files with a
+ user ID greater than 2_097_151 (octal 7_777_777). *Note
+ ustar-uid-gid::.
+
+'GNU.crc32'
+ CRC32-C (Castagnoli) of the extended header data excluding the 8 bytes
+ representing the CRC <value> itself. The <value> is represented as 8
+ hexadecimal digits in big endian order, '22 GNU.crc32=00000000\n'. The
+ keyword of the CRC record is protected by the CRC to guarantee that
+ corruption is always detected when using '--missing-crc' (except in
+ case of CRC collision). A CRC was chosen because a checksum is too
+ weak for a potentially large list of variable sized records. A
+ checksum can't detect simple errors like the swapping of two bytes.
+
+
+ At verbosity level 1 or higher tarlz prints a diagnostic for each unknown
+extended header keyword found in an archive, once per keyword.
+
+
+4.2 Ustar header block
+======================
+
+The ustar header block has a length of 512 bytes and is structured as shown
+in the following table. All lengths and offsets are in decimal.
+
+Field Name Offset Length (in bytes)
+name 0 100
+mode 100 8
+uid 108 8
+gid 116 8
+size 124 12
+mtime 136 12
+chksum 148 8
+typeflag 156 1
+linkname 157 100
+magic 257 6
+version 263 2
+uname 265 32
+gname 297 32
+devmajor 329 8
+devminor 337 8
+prefix 345 155
+
+ All characters in the header block are coded using the ISO/IEC 646:1991
+(ASCII) standard, except in fields storing names for files, users, and
+groups. For maximum portability between implementations, names should only
+contain characters from the portable character set (*note Portable
+character set::), but if an implementation supports the use of characters
+outside of '/' and the portable character set in names for files, users,
+and groups, tarlz will use the byte values in these names unmodified.
+
+ The fields 'name', 'linkname', and 'prefix' are null-terminated
+character strings except when all characters in the array contain non-null
+characters including the last character.
+
+ The fields 'name' and 'prefix' produce the file name. A new file name is
+formed, if prefix is not an empty string (its first character is not null),
+by concatenating prefix (up to the first null character), a slash
+character, and name; otherwise, name is used alone. In either case, name is
+terminated at the first null character. If prefix begins with a null
+character, it is ignored. In this manner, file names of at most 256
+characters can be supported. If a file name does not fit in the space
+provided, an extended record is used to store the file name.
+
+ The field 'linkname' does not use the prefix to produce a file name. If
+the link name does not fit in the 100 characters provided, an extended
+record is used to store the link name.
+
+ The field 'mode' provides 12 access permission bits. The following table
+shows the symbolic name of each bit and its octal value:
+
+Bit Name Value Bit Name Value Bit Name Value
+---------------------------------------------------
+S_ISUID 04000 S_ISGID 02000 S_ISVTX 01000
+S_IRUSR 00400 S_IWUSR 00200 S_IXUSR 00100
+S_IRGRP 00040 S_IWGRP 00020 S_IXGRP 00010
+S_IROTH 00004 S_IWOTH 00002 S_IXOTH 00001
+
+ The fields 'uid' and 'gid' are the user and group IDs of the owner and
+group of the file, respectively. If the file uid or gid are greater than
+2_097_151 (octal 7_777_777), an extended record is used to store the uid or
+gid.
+
+ The field 'size' contains the octal representation of the size of the
+file in bytes. If the field 'typeflag' specifies a file of type '0'
+(regular file) or '7' (high performance regular file), the number of logical
+records following the header is (size / 512) rounded to the next integer.
+For all other values of typeflag, tarlz either sets the size field to 0 or
+ignores it, and does not store or expect any logical records following the
+header. If the file size is larger than 8_589_934_591 bytes
+(octal 77_777_777_777), an extended record is used to store the file size.
+
+ The field 'mtime' contains the octal representation of the modification
+time of the file at the time it was archived, obtained from the function
+'stat'. If the modification time is negative or larger than 8_589_934_591
+(octal 77_777_777_777) seconds since the epoch, an extended record is used
+to store the modification time. The ustar range of mtime goes from
+'1970-01-01 00:00:00 UTC' to '2242-03-16 12:56:31 UTC'.
+
+ The field 'chksum' contains the octal representation of the value of the
+simple sum of all bytes in the header logical record. Each byte in the
+header is treated as an unsigned value. When calculating the checksum, the
+chksum field is treated as if it were all space characters.
+
+ The field 'typeflag' contains a single character specifying the type of
+file archived:
+
+''0''
+ Regular file.
+
+''1''
+ Hard link to another file, of any type, previously archived. Hard
+ links must not contain file data.
+
+''2''
+ Symbolic link.
+
+''3', '4''
+ Character special file and block special file respectively. In this
+ case the fields 'devmajor' and 'devminor' contain information defining
+ the device in unspecified format.
+
+''5''
+ Directory.
+
+''6''
+ FIFO special file.
+
+''7''
+ Reserved to represent a file to which an implementation has associated
+ some high-performance attribute (contiguous file). Tarlz treats this
+ type of file as a regular file (type 0).
+
+
+ The field 'magic' contains the ASCII null-terminated string "ustar". The
+field 'version' contains the characters "00" (0x30,0x30). The fields
+'uname' and 'gname' are null-terminated character strings except when all
+characters in the array contain non-null characters including the last
+character. Each numeric field contains a leading space- or zero-filled,
+optionally null-terminated octal number using digits from the ISO/IEC
+646:1991 (ASCII) standard. Tarlz is able to decode numeric fields 1 byte
+longer than standard ustar by not requiring a terminating null character.
+
+
+File: tarlz.info, Node: Amendments to pax format, Next: Program design, Prev: File format, Up: Top
+
+5 The reasons for the differences with pax
+******************************************
+
+Tarlz creates safe archives that allow the reliable detection of invalid or
+corrupt metadata during decoding even when the integrity checking of lzip
+can't be used because the lzip members are only decompressed partially, as
+it happens in parallel '--diff', '--list', and '--extract'. In order to
+achieve this goal and avoid some other flaws in the pax format, tarlz makes
+some changes to the variant of the pax format that it uses. This chapter
+describes these changes and the concrete reasons to implement them.
+
+
+5.1 Add a CRC of the extended records
+=====================================
+
+The POSIX pax format has a serious flaw. The metadata stored in pax extended
+records are not protected by any kind of check sequence. Corruption in a
+long file name may cause the extraction of the file in the wrong place
+without warning. Corruption in a large file size may cause the truncation of
+the file or the appending of garbage to the file, both followed by a
+spurious warning about a corrupt header far from the place of the undetected
+corruption.
+
+ Metadata like file name and file size must be always protected in an
+archive format because of the adverse effects of undetected corruption in
+them, potentially much worse that undetected corruption in the data. Even
+more so in the case of pax because the amount of metadata it stores is
+potentially large, making undetected corruption and archiver misbehavior
+more probable.
+
+ Headers and metadata must be protected separately from data because the
+integrity checking of lzip may not be able to detect the corruption before
+the metadata have been used, for example, to create a new file in the wrong
+place.
+
+ Because of the above, tarlz protects the extended records with a Cyclic
+Redundancy Check (CRC) in a way compatible with standard tar tools. *Note
+key_crc32::.
+
+
+5.2 Remove flawed backward compatibility
+========================================
+
+In order to allow the extraction of pax archives by a tar utility conforming
+to the POSIX-2:1993 standard, POSIX.1-2008 recommends selecting extended
+header field values that allow such tar to create a regular file containing
+the extended header records as data. This approach is broken because if the
+extended header is needed because of a long file name, the fields 'name'
+and 'prefix' are unable to contain the full file name. (Some tar
+implementations store the truncated name in the field 'name' alone,
+truncating the name to only 100 bytes instead of 256). Therefore the files
+corresponding to both the extended header and the overridden ustar header
+are extracted using truncated file names, perhaps overwriting existing
+files or directories. It may be a security risk to extract a file with a
+truncated file name.
+
+ To avoid this problem, tarlz writes extended headers with all fields
+zeroed except 'size' (which contains the size of the extended records),
+'chksum', 'typeflag', 'magic', and 'version'. In particular, tarlz sets the
+fields 'name' and 'prefix' to zero. This prevents old tar programs from
+extracting the extended records as a file in the wrong place. Tarlz also
+sets to zero those fields of the ustar header overridden by extended
+records. Finally, tarlz skips members with zeroed 'name' and 'prefix' when
+decoding, except when listing. This is needed to detect certain format
+violations during parallel extraction.
+
+ If an extended header is required for any reason (for example a file
+size of 8 GiB or larger, or a link name longer than 100 bytes), tarlz also
+moves the file name to the extended records to prevent an ustar tool from
+trying to extract the file or link. This also makes easier during parallel
+decoding the detection of a tar member split between two lzip members at
+the boundary between the extended header and the ustar header.
+
+
+5.3 As simple as possible (but not simpler)
+===========================================
+
+The tarlz format is mainly ustar. Extended pax headers are used only when
+needed because the length of a file name or link name, or the size or other
+attribute of a file exceed the limits of the ustar format. Adding 1 KiB of
+extended header and records to each member just to save subsecond
+timestamps seems wasteful for a backup format. Moreover, minimizing the
+overhead may help recovering the archive with lziprecover in case of
+corruption.
+
+ Global pax headers are tolerated, but not supported; they are parsed and
+ignored. Some operations may not behave as expected if the archive contains
+global headers.
+
+
+5.4 Improve reproducibility
+===========================
+
+Pax includes by default the process ID of the pax process in the ustar name
+of the extended headers, making the archive not reproducible. Tarlz stores
+the true name of the file just once, either in the ustar header or in the
+extended records, making it easier to produce reproducible archives.
+
+ Pax allows an extended record to have length x-1 or x if x is a power of
+ten; '99<97_bytes>' or '100<97_bytes>'. Tarlz minimizes the length of the
+record and always produces a length of x-1 in these cases.
+
+
+5.5 No data in hard links
+=========================
+
+Tarlz does not allow data in hard link members. The data (if any) must be in
+the member determining the type of the file (which can't be a link). If all
+the names of a file are stored as hard links, the type of the file is lost.
+Not allowing data in hard links also prevents invalid actions like
+extracting file data for a hard link to a symbolic link or to a directory.
+
+
+5.6 Avoid misconversions to/from UTF-8
+======================================
+
+There is no portable way to tell what charset a text string is coded into.
+Therefore, tarlz stores all fields representing text strings unmodified,
+without conversion to UTF-8 nor any other transformation. This prevents
+accidental double UTF-8 conversions. If the need arises this behavior will
+be adjusted with a command-line option in the future.
+
+
+File: tarlz.info, Node: Program design, Next: Multi-threaded decoding, Prev: Amendments to pax format, Up: Top
+
+6 Internal structure of tarlz
+*****************************
+
+The parts of tarlz related to sequential processing of the archive are more
+or less similar to any other tar and won't be described here. The
+interesting parts described here are those related to Multi-threaded
+processing.
+
+ The structure of the part of tarlz performing Multi-threaded archive
+creation is somewhat similar to that of plzip with the added complication
+of the solidity levels. *Note Program design: (plzip)Program design. A
+grouper thread and several worker threads are created, acting the main
+thread as muxer (multiplexer) thread. A "packet courier" takes care of data
+transfers among threads and limits the maximum number of data blocks
+(packets) being processed simultaneously.
+
+ The grouper traverses the directory tree, groups together the metadata of
+the files to be archived in each lzip member, and distributes them to the
+workers. The workers compress the metadata received from the grouper along
+with the file data read from the file system. The muxer collects processed
+packets from the workers, and writes them to the archive.
+
+.--------.
+| data|---> to each worker below
+| | .------------.
+| file | ,-->| worker 0 |--,
+| system | | `------------' |
+| | .---------. | .------------. | .-------. .---------.
+|metadata|--->| grouper |-+-->| worker 1 |--+-->| muxer |-->| archive |
+`--------' `---------' | `------------' | `-------' `---------'
+ | ... |
+ | .------------. |
+ `-->| worker N-1 |--'
+ `------------'
+
+ Decoding an archive is somewhat similar to how plzip decompresses a
+regular file to standard output, with the differences that it is not the
+data but only messages what is written to stdout/stderr, and that each
+worker may access files in the file system either to read them (diff) or
+write them (extract). As in plzip, each worker reads members directly from
+the archive.
+
+.--------.
+| file |<---> data to/from each worker below
+| system |
+`--------' .------------.
+ ,-->| worker 0 |--,
+ | `------------' |
+.---------. | .------------. | .-------. .--------.
+| archive |-+-->| worker 1 |--+-->| muxer |-->| stdout |
+`---------' | `------------' | `-------' | stderr |
+ | ... | `--------'
+ | .------------. |
+ `-->| worker N-1 |--'
+ `------------'
+
+ As misaligned tar.lz archives can't be decoded in parallel, and the
+misalignment can't be detected until after decoding has started, a
+"mastership request" mechanism has been designed that allows the decoding to
+continue instead of signalling an error.
+
+ During parallel decoding, if a worker finds a misalignment, it requests
+mastership to decode the rest of the archive. When mastership is requested,
+an error_member_id is set, and all subsequently received packets with
+member_id > error_member_id are rejected. All workers requesting mastership
+are blocked at the request_mastership call until mastership is granted.
+Mastership is granted to the delivering worker when its queue is empty to
+make sure that all preceding packets have been processed. When mastership is
+granted, all packets are deleted and all subsequently received packets not
+coming from the master are rejected.
+
+ If a worker can't continue decoding for any cause (for example lack of
+memory or finding a split tar member at the beginning of a lzip member), it
+requests mastership to print an error and terminate the program. Only if
+some other worker requests mastership in a previous lzip member can this
+error be avoided.
+
+
+File: tarlz.info, Node: Multi-threaded decoding, Next: Minimum archive sizes, Prev: Program design, Up: Top
+
+7 Limitations of parallel tar decoding
+**************************************
+
+Safely decoding an arbitrary tar archive in parallel is only possible if one
+decodes the headers sequentially first. For example, if a tar archive
+containing another tar archive is decoded starting from some position other
+than the beginning, there is no way to know if the first header found there
+belongs to the outer tar archive or to the inner tar archive. Tar is a
+format inherently serial; it was designed for tapes.
+
+ The pax format is even more serial than the ustar format. Two headers
+need to be decoded sequentially for each file. The extended header may even
+need parsing to reveal something as basic as file size. If a thread decodes
+the ustar header skipping the preceding extended header, it may extract a
+file of incorrect size at the wrong place. Moreover, a pax archive with
+global headers can't be decoded in parallel because each thread can't know
+about the global headers decoded by other threads.
+
+ In the case of compressed tar archives, the start of each compressed
+block determines one point through which the tar archive can be decoded in
+parallel. Therefore, in tar.lz archives the decoding operations can't be
+parallelized if the tar members are not aligned with the lzip members. Tar
+archives compressed with plzip can't be decoded in parallel because tar and
+plzip do not have a way to align both sets of members. Certainly one can
+decompress one such archive with a multi-threaded tool like plzip, but the
+increase in speed is not as large as it could be because plzip must
+serialize the decompressed data and pass them to tar, which decodes them
+sequentially, one tar member at a time.
+
+ On the other hand, if the tar.lz archive is created with a tool like
+tarlz, which can guarantee the alignment between tar members and lzip
+members because it controls both archiving and compression, then the lzip
+format becomes an indexed layer on top of the tar archive which makes
+possible decoding it safely in parallel.
+
+ Tarlz is able to automatically decode aligned and unaligned multimember
+tar.lz archives, keeping backwards compatibility. If tarlz finds a member
+misalignment during multi-threaded decoding, it switches to single-threaded
+mode and continues decoding the archive.
+
+ If the files in the archive are large, multi-threaded '--list' on a
+regular (seekable) tar.lz archive can be hundreds of times faster than
+sequential '--list' because, in addition to using several processors, it
+only needs to decompress part of each lzip member. See the following
+example listing the Silesia corpus on a dual core machine:
+
+ tarlz -9 --no-solid -cf silesia.tar.lz silesia
+ time lzip -cd silesia.tar.lz | tar -tf - (5.032s)
+ time plzip -cd silesia.tar.lz | tar -tf - (3.256s)
+ time tarlz -tf silesia.tar.lz (0.020s)
+
+ On the other hand, multi-threaded '--list' won't detect corruption in
+the tar member data because it only decodes the part of each lzip member
+corresponding to the tar member header. This is another reason why the tar
+headers must provide their own integrity checking.
+
+
+7.1 Limitations of multi-threaded extraction
+============================================
+
+Multi-threaded extraction may produce different output than single-threaded
+extraction in some cases:
+
+ During multi-threaded extraction, several independent threads are
+simultaneously reading the archive and creating files in the file system.
+The archive is not read sequentially. As a consequence, any error or
+weirdness in the archive (like a corrupt member or an end-of-archive block
+in the middle of the archive) won't be usually detected until part of the
+archive beyond that point has been processed.
+
+ If the archive contains two or more tar members with the same name,
+single-threaded extraction extracts the members in the order they appear in
+the archive and leaves in the file system the last version of the file. But
+multi-threaded extraction may extract the members in any order and leave in
+the file system any version of the file nondeterministically. It is
+unspecified which of the tar members is extracted.
+
+ If the same file is extracted through several paths (different member
+names resolve to the same file in the file system), the result is undefined.
+(Probably the resulting file will be mangled).
+
+ Extraction of a hard link may fail if it is extracted before the file it
+links to.
+
+
+File: tarlz.info, Node: Minimum archive sizes, Next: Examples, Prev: Multi-threaded decoding, Up: Top
+
+8 Minimum archive sizes required for multi-threaded block compression
+*********************************************************************
+
+When creating or appending to a compressed archive using multi-threaded
+block compression, tarlz puts tar members together in blocks and compresses
+as many blocks simultaneously as worker threads are chosen, creating a
+multimember compressed archive.
+
+ For this to work as expected (and roughly multiply the compression speed
+by the number of available processors), the uncompressed archive must be at
+least as large as the number of worker threads times the block size (*note
+--data-size::). Else some processors do not get any data to compress, and
+compression is proportionally slower. The maximum speed increase achievable
+on a given archive is limited by the ratio (uncompressed_size / data_size).
+For example, a tarball the size of gcc or linux scales up to 10 or 14
+processors at level -9.
+
+ The following table shows the minimum uncompressed archive size needed
+for full use of N processors at a given compression level, using the default
+data size for each level:
+
+Processors 2 4 8 16 64 256
+------------------------------------------------------------------
+Level
+-0 2 MiB 4 MiB 8 MiB 16 MiB 64 MiB 256 MiB
+-1 4 MiB 8 MiB 16 MiB 32 MiB 128 MiB 512 MiB
+-2 6 MiB 12 MiB 24 MiB 48 MiB 192 MiB 768 MiB
+-3 8 MiB 16 MiB 32 MiB 64 MiB 256 MiB 1 GiB
+-4 12 MiB 24 MiB 48 MiB 96 MiB 384 MiB 1.5 GiB
+-5 16 MiB 32 MiB 64 MiB 128 MiB 512 MiB 2 GiB
+-6 32 MiB 64 MiB 128 MiB 256 MiB 1 GiB 4 GiB
+-7 64 MiB 128 MiB 256 MiB 512 MiB 2 GiB 8 GiB
+-8 96 MiB 192 MiB 384 MiB 768 MiB 3 GiB 12 GiB
+-9 128 MiB 256 MiB 512 MiB 1 GiB 4 GiB 16 GiB
+
+
+File: tarlz.info, Node: Examples, Next: Problems, Prev: Minimum archive sizes, Up: Top
+
+9 A small tutorial with examples
+********************************
+
+Example 1: Create a multimember compressed archive 'archive.tar.lz'
+containing files 'a', 'b' and 'c'.
+
+ tarlz -cf archive.tar.lz a b c
+
+
+Example 2: Append files 'd' and 'e' to the multimember compressed archive
+'archive.tar.lz'.
+
+ tarlz -rf archive.tar.lz d e
+
+
+Example 3: Create a solidly compressed appendable archive 'archive.tar.lz'
+containing files 'a', 'b' and 'c'. Then append files 'd' and 'e' to the
+archive.
+
+ tarlz --asolid -cf archive.tar.lz a b c
+ tarlz --asolid -rf archive.tar.lz d e
+
+
+Example 4: Create a compressed appendable archive containing directories
+'dir1', 'dir2' and 'dir3' with a separate lzip member per directory. Then
+append files 'a', 'b', 'c', 'd' and 'e' to the archive, all of them
+contained in a single lzip member. The resulting archive 'archive.tar.lz'
+contains 5 lzip members (including the end-of-archive member).
+
+ tarlz --dsolid -cf archive.tar.lz dir1 dir2 dir3
+ tarlz --asolid -rf archive.tar.lz a b c d e
+
+
+Example 5: Create a solidly compressed archive 'archive.tar.lz' containing
+files 'a', 'b' and 'c'. Note that no more files can be later appended to
+the archive.
+
+ tarlz --solid -cf archive.tar.lz a b c
+
+
+Example 6: Extract all files from archive 'archive.tar.lz'.
+
+ tarlz -xf archive.tar.lz
+
+
+Example 7: Extract files 'a' and 'c', and the whole tree under directory
+'dir1' from archive 'archive.tar.lz'.
+
+ tarlz -xf archive.tar.lz a c dir1
+
+
+Example 8: Copy the contents of directory 'sourcedir' to the directory
+'destdir'.
+
+ tarlz -C sourcedir --uncompressed -cf - . | tarlz -C destdir -xf -
+
+
+Example 9: Compress the existing POSIX archive 'archive.tar' and write the
+output to 'archive.tar.lz'. Compress each member individually for maximum
+availability. (If one member in the compressed archive gets damaged, the
+other members can still be extracted).
+
+ tarlz -z --no-solid archive.tar
+
+
+Example 10: Compress the archive 'archive.tar' and write the output to
+'foo.tar.lz'.
+
+ tarlz -z -o foo.tar.lz archive.tar
+
+
+Example 11: Concatenate and compress two archives 'archive1.tar' and
+'archive2.tar', and write the output to 'foo.tar.lz'.
+
+ tarlz -A archive1.tar archive2.tar | tarlz -z -o foo.tar.lz
+
+
+File: tarlz.info, Node: Problems, Next: Concept index, Prev: Examples, Up: Top
+
+10 Reporting bugs
+*****************
+
+There are probably bugs in tarlz. There are certainly errors and omissions
+in this manual. If you report them, they will get fixed. If you don't, no
+one will ever know about them and they will remain unfixed for all
+eternity, if not longer.
+
+ If you find a bug in tarlz, please send electronic mail to
+<lzip-bug@nongnu.org>. Include the version number, which you can find by
+running 'tarlz --version' and 'tarlz -v --check-lib'.
+
+
+File: tarlz.info, Node: Concept index, Prev: Problems, Up: Top
+
+Concept index
+*************
+
+
+* Menu:
+
+* Amendments to pax format: Amendments to pax format. (line 6)
+* bugs: Problems. (line 6)
+* examples: Examples. (line 6)
+* file format: File format. (line 6)
+* getting help: Problems. (line 6)
+* introduction: Introduction. (line 6)
+* invoking: Invoking tarlz. (line 6)
+* minimum archive sizes: Minimum archive sizes. (line 6)
+* options: Invoking tarlz. (line 6)
+* parallel tar decoding: Multi-threaded decoding. (line 6)
+* portable character set: Portable character set. (line 6)
+* program design: Program design. (line 6)
+* usage: Invoking tarlz. (line 6)
+* version: Invoking tarlz. (line 6)
+
+
+
+Tag Table:
+Node: Top216
+Node: Introduction1207
+Node: Invoking tarlz4032
+Ref: --data-size13076
+Ref: --bsolid17512
+Node: Portable character set23425
+Node: File format24068
+Ref: key_crc3231050
+Ref: ustar-uid-gid34315
+Ref: ustar-mtime35122
+Node: Amendments to pax format37125
+Ref: crc3237834
+Ref: flawed-compat39146
+Node: Program design43228
+Node: Multi-threaded decoding47153
+Ref: mt-extraction50434
+Node: Minimum archive sizes51740
+Node: Examples53867
+Node: Problems56234
+Node: Concept index56789
+
+End Tag Table
+
+
+Local Variables:
+coding: iso-8859-15
+End:
diff --git a/doc/tarlz.texi b/doc/tarlz.texi
new file mode 100644
index 0000000..f37164f
--- /dev/null
+++ b/doc/tarlz.texi
@@ -0,0 +1,1356 @@
+\input texinfo @c -*-texinfo-*-
+@c %**start of header
+@setfilename tarlz.info
+@documentencoding ISO-8859-15
+@settitle Tarlz Manual
+@finalout
+@c %**end of header
+
+@set UPDATED 3 January 2024
+@set VERSION 0.25
+
+@dircategory Archiving
+@direntry
+* Tarlz: (tarlz). Archiver with multimember lzip compression
+@end direntry
+
+
+@ifnothtml
+@titlepage
+@title Tarlz
+@subtitle Archiver with multimember lzip compression
+@subtitle for Tarlz version @value{VERSION}, @value{UPDATED}
+@author by Antonio Diaz Diaz
+
+@page
+@vskip 0pt plus 1filll
+@end titlepage
+
+@contents
+@end ifnothtml
+
+@ifnottex
+@node Top
+@top
+
+This manual is for Tarlz (version @value{VERSION}, @value{UPDATED}).
+
+@menu
+* Introduction:: Purpose and features of tarlz
+* Invoking tarlz:: Command-line interface
+* Portable character set:: POSIX portable filename character set
+* File format:: Detailed format of the compressed archive
+* Amendments to pax format:: The reasons for the differences with pax
+* Program design:: Internal structure of tarlz
+* Multi-threaded decoding:: Limitations of parallel tar decoding
+* Minimum archive sizes:: Sizes required for full multi-threaded speed
+* Examples:: A small tutorial with examples
+* Problems:: Reporting bugs
+* Concept index:: Index of concepts
+@end menu
+
+@sp 1
+Copyright @copyright{} 2013-2024 Antonio Diaz Diaz.
+
+This manual is free documentation: you have unlimited permission to copy,
+distribute, and modify it.
+@end ifnottex
+
+
+@node Introduction
+@chapter Introduction
+@cindex introduction
+
+@uref{http://www.nongnu.org/lzip/tarlz.html,,Tarlz} is a massively parallel
+(multi-threaded) combined implementation of the tar archiver and the
+@uref{http://www.nongnu.org/lzip/lzip.html,,lzip} compressor. Tarlz uses the
+compression library @uref{http://www.nongnu.org/lzip/lzlib.html,,lzlib}.
+
+Tarlz creates tar archives using a simplified and safer variant of the POSIX
+pax format compressed in lzip format, keeping the alignment between tar
+members and lzip members. The resulting multimember tar.lz archive is
+backward compatible with standard tar tools like GNU tar, which treat it
+like any other tar.lz archive. Tarlz can append files to the end of such
+compressed archives.
+
+Keeping the alignment between tar members and lzip members has two
+advantages. It adds an indexed lzip layer on top of the tar archive, making
+it possible to decode the archive safely in parallel. It also minimizes the
+amount of data lost in case of corruption. Compressing a tar archive with
+plzip may even double the amount of files lost for each lzip member damaged
+because it does not keep the members aligned.
+
+Tarlz can create tar archives with five levels of compression granularity:
+per file (@option{--no-solid}), per block (@option{--bsolid}, default), per
+directory (@option{--dsolid}), appendable solid (@option{--asolid}), and
+solid (@option{--solid}). It can also create uncompressed tar archives.
+
+@noindent
+Of course, compressing each file (or each directory) individually can't
+achieve a compression ratio as high as compressing solidly the whole tar
+archive, but it has the following advantages:
+
+@itemize @bullet
+@item
+The resulting multimember tar.lz archive can be decompressed in
+parallel, multiplying the decompression speed.
+
+@item
+New members can be appended to the archive (by removing the
+end-of-archive member), and unwanted members can be deleted from the
+archive. Just like an uncompressed tar archive.
+
+@item
+It is a safe POSIX-style backup format. In case of corruption, tarlz
+can extract all the undamaged members from the tar.lz archive,
+skipping over the damaged members, just like the standard
+(uncompressed) tar. Moreover, the option @option{--keep-damaged} can be used
+to recover as much data as possible from each damaged member, and
+lziprecover can be used to recover some of the damaged members.
+
+@item
+A multimember tar.lz archive is usually smaller than the corresponding
+solidly compressed tar.gz archive, except when individually
+compressing files smaller than about @w{32 KiB}.
+@end itemize
+
+Tarlz protects the extended records with a Cyclic Redundancy Check (CRC) in
+a way compatible with standard tar tools. @xref{crc32}.
+
+Tarlz does not understand other tar formats like @samp{gnu}, @samp{oldgnu},
+@samp{star}, or @samp{v7}. The command
+@w{@samp{tarlz -t -f archive.tar.lz > /dev/null}} can be used to check that
+the format of the archive is compatible with tarlz.
+
+
+@node Invoking tarlz
+@chapter Invoking tarlz
+@cindex invoking
+@cindex options
+@cindex usage
+@cindex version
+
+The format for running tarlz is:
+
+@example
+tarlz @var{operation} [@var{options}] [@var{files}]
+@end example
+
+@noindent
+All operations except @option{--concatenate} and @option{--compress} operate
+on whole trees if any @var{file} is a directory. All operations except
+@option{--compress} overwrite output files without warning. If no archive is
+specified, tarlz tries to read it from standard input or write it to
+standard output. Tarlz refuses to read archive data from a terminal or write
+archive data to a terminal. Tarlz detects when the archive being created or
+enlarged is among the files to be archived, appended, or concatenated, and
+skips it.
+
+Tarlz does not use absolute file names nor file names above the current
+working directory (perhaps changed by option @option{-C}). On archive creation
+or appending tarlz archives the files specified, but removes from member
+names any leading and trailing slashes and any file name prefixes containing
+a @samp{..} component. On extraction, leading and trailing slashes are also
+removed from member names, and archive members containing a @samp{..}
+component in the file name are skipped. Tarlz does not follow symbolic links
+during extraction; not even symbolic links replacing intermediate
+directories.
+
+On extraction and listing, tarlz removes leading @samp{./} strings from
+member names in the archive or given in the command line, so that
+@w{@samp{tarlz -xf foo ./bar baz}} extracts members @samp{bar} and
+@samp{./baz} from archive @samp{foo}.
+
+If several compression levels or @option{--*solid} options are given, the last
+setting is used. For example @w{@option{-9 --solid --uncompressed -1}} is
+equivalent to @w{@option{-1 --solid}}.
+
+tarlz supports the following operations:
+
+@table @code
+@item --help
+Print an informative help message describing the options and exit.
+
+@item -V
+@itemx --version
+Print the version number of tarlz on the standard output and exit.
+This version number should be included in all bug reports.
+
+@item -A
+@itemx --concatenate
+Append one or more archives to the end of an archive. If no archive is
+specified with the option @option{-f}, concatenate the input archives to
+standard output. All the archives involved must be regular (seekable) files,
+and must be either all compressed or all uncompressed. Compressed and
+uncompressed archives can't be mixed. Compressed archives must be
+multimember lzip files with the two end-of-archive blocks plus any zero
+padding contained in the last lzip member of each archive. The intermediate
+end-of-archive blocks are removed as each new archive is concatenated. If
+the archive is uncompressed, tarlz parses tar headers until it finds the
+end-of-archive blocks. Exit with status 0 without modifying the archive if
+no @var{files} have been specified.
+
+Concatenating archives containing files in common results in two or more tar
+members with the same name in the resulting archive, which may produce
+nondeterministic behavior during multi-threaded extraction.
+@xref{mt-extraction}.
+
+@item -c
+@itemx --create
+Create a new archive from @var{files}.
+
+@item -d
+@itemx --diff
+Compare and report differences between archive and file system. For each tar
+member in the archive, check that the corresponding file in the file system
+exists and is of the same type (regular file, directory, etc). Report on
+standard output the differences found in type, mode (permissions), owner and
+group IDs, modification time, file size, file contents (of regular files),
+target (of symlinks) and device number (of block/character special files).
+
+As tarlz removes leading slashes from member names, the option @option{-C} may
+be used in combination with @option{--diff} when absolute file names were used
+on archive creation: @w{@samp{tarlz -C / -d}}. Alternatively, tarlz may be
+run from the root directory to perform the comparison.
+
+@item --delete
+Delete files and directories from an archive in place. It currently can
+delete only from uncompressed archives and from archives with files
+compressed individually (@option{--no-solid} archives). Note that files of
+about @option{--data-size} or larger are compressed individually even if
+@option{--bsolid} is used, and can therefore be deleted. Tarlz takes care to
+not delete a tar member unless it is possible to do so. For example it won't
+try to delete a tar member that is not compressed individually. Even in the
+case of finding a corrupt member after having deleted some member(s), tarlz
+stops and copies the rest of the file as soon as corruption is found,
+leaving it just as corrupt as it was, but not worse.
+
+To delete a directory without deleting the files under it, use
+@w{@samp{tarlz --delete -f foo --exclude='dir/*' dir}}. Deleting in place
+may be dangerous. A corrupt archive, a power cut, or an I/O error may cause
+data loss.
+
+@item -r
+@itemx --append
+Append files to the end of an archive. The archive must be a regular
+(seekable) file either compressed or uncompressed. Compressed members can't
+be appended to an uncompressed archive, nor vice versa. If the archive is
+compressed, it must be a multimember lzip file with the two end-of-archive
+blocks plus any zero padding contained in the last lzip member of the
+archive. It is possible to append files to an archive with a different
+compression granularity. Appending works as follows; first the
+end-of-archive blocks are removed, then the new members are appended, and
+finally two new end-of-archive blocks are appended to the archive. If the
+archive is uncompressed, tarlz parses and skips tar headers until it finds
+the end-of-archive blocks. Exit with status 0 without modifying the archive
+if no @var{files} have been specified.
+
+Appending files already present in the archive results in two or more tar
+members with the same name, which may produce nondeterministic behavior
+during multi-threaded extraction. @xref{mt-extraction}.
+
+@item -t
+@itemx --list
+List the contents of an archive. If @var{files} are given, list only the
+@var{files} given.
+
+@item -x
+@itemx --extract
+Extract files from an archive. If @var{files} are given, extract only the
+@var{files} given. Else extract all the files in the archive. To extract a
+directory without extracting the files under it, use
+@w{@samp{tarlz -xf foo --exclude='dir/*' dir}}. Tarlz removes files and
+empty directories unconditionally before extracting over them. Other than
+that, it does not make any special effort to extract a file over an
+incompatible type of file. For example, extracting a file over a non-empty
+directory usually fails.
+
+@item -z
+@itemx --compress
+Compress existing POSIX tar archives aligning the lzip members to the tar
+members with choice of granularity (@option{--bsolid} by default,
+@option{--dsolid} works like @option{--asolid}). Exit with error status 2 if
+any input archive is an empty file. The input archives are kept unchanged.
+Existing compressed archives are not overwritten. A hyphen @samp{-} used as
+the name of an input archive reads from standard input and writes to
+standard output (unless the option @option{--output} is used). Tarlz can be
+used as compressor for GNU tar by using a command like
+@w{@samp{tar -c -Hustar foo | tarlz -z -o foo.tar.lz}}. Tarlz can be used as
+compressor for zupdate (zutils) by using a command like
+@w{@samp{zupdate --lz="tarlz -z" foo.tar.gz}}. Note that tarlz only works
+reliably on archives without global headers, or with global headers whose
+content can be ignored.
+
+The compression is reversible, including any garbage present after the
+end-of-archive blocks. Tarlz stops parsing after the first end-of-archive
+block is found, and then compresses the rest of the archive. Unless solid
+compression is requested, the end-of-archive blocks are compressed in a lzip
+member separated from the preceding members and from any non-zero garbage
+following the end-of-archive blocks. @option{--compress} implies plzip
+argument style, not tar style. Each input archive is compressed to a file
+with the extension @samp{.lz} added unless the option @option{--output} is
+used. When @option{--output} is used, only one input archive can be specified.
+@option{-f} can't be used with @option{--compress}.
+
+@item --check-lib
+Compare the
+@uref{http://www.nongnu.org/lzip/manual/lzlib_manual.html#Library-version,,version of lzlib}
+used to compile tarlz with the version actually being used at run time and
+exit. Report any differences found. Exit with error status 1 if differences
+are found. A mismatch may indicate that lzlib is not correctly installed or
+that a different version of lzlib has been installed after compiling tarlz.
+Exit with error status 2 if LZ_API_VERSION and LZ_version_string don't
+match. @w{@samp{tarlz -v --check-lib}} shows the version of lzlib being used
+and the value of LZ_API_VERSION (if defined).
+@ifnothtml
+@xref{Library version,,,lzlib}.
+@end ifnothtml
+
+@end table
+
+tarlz supports the following
+@uref{http://www.nongnu.org/arg-parser/manual/arg_parser_manual.html#Argument-syntax,,options}:
+@ifnothtml
+@xref{Argument syntax,,,arg_parser}.
+@end ifnothtml
+
+@table @code
+@anchor{--data-size}
+@item -B @var{bytes}
+@itemx --data-size=@var{bytes}
+Set target size of input data blocks for the option @option{--bsolid}.
+@xref{--bsolid}. Valid values range from @w{8 KiB} to @w{1 GiB}. Default
+value is two times the dictionary size, except for option @option{-0} where it
+defaults to @w{1 MiB}. @xref{Minimum archive sizes}.
+
+@item -C @var{dir}
+@itemx --directory=@var{dir}
+Change to directory @var{dir}. When creating, appending, comparing, or
+extracting, the position of each @option{-C} option in the command line is
+significant; it changes the current working directory for the following
+@var{files} until a new @option{-C} option appears in the command line.
+@option{--list} and @option{--delete} ignore any @option{-C} options
+specified. @var{dir} is relative to the then current working directory,
+perhaps changed by a previous @option{-C} option.
+
+Note that a process can only have one current working directory (CWD).
+Therefore multi-threading can't be used to create or decode an archive if a
+@option{-C} option appears after a (relative) file name in the command line.
+(All file names are made relative when decoding).
+
+@item -f @var{archive}
+@itemx --file=@var{archive}
+Use archive file @var{archive}. A hyphen @samp{-} used as an @var{archive}
+argument reads from standard input or writes to standard output.
+
+@item -h
+@itemx --dereference
+Follow symbolic links during archive creation, appending or comparison.
+Archive or compare the files they point to instead of the links themselves.
+
+@item -n @var{n}
+@itemx --threads=@var{n}
+Set the number of (de)compression threads, overriding the system's default.
+Valid values range from 0 to "as many as your system can support". A value
+of 0 disables threads entirely. If this option is not used, tarlz tries to
+detect the number of processors in the system and use it as default value.
+@w{@samp{tarlz --help}} shows the system's default value. See the note about
+multi-threading in the option @option{-C} above.
+
+Note that the number of usable threads is limited during compression to
+@w{ceil( uncompressed_size / data_size )} (@pxref{Minimum archive sizes}),
+and during decompression to the number of lzip members in the tar.lz
+archive, which you can find by running @w{@samp{lzip -lv archive.tar.lz}}.
+
+@item -o @var{file}
+@itemx --output=@var{file}
+Write the compressed output to @var{file}. @w{@option{-o -}} writes the
+compressed output to standard output. Currently @option{--output} only works
+with @option{--compress}.
+
+@item -p
+@itemx --preserve-permissions
+On extraction, set file permissions as they appear in the archive. This is
+the default behavior when tarlz is run by the superuser. The default for
+other users is to subtract the umask of the user running tarlz from the
+permissions specified in the archive.
+
+@item -q
+@itemx --quiet
+Quiet operation. Suppress all messages.
+
+@item -v
+@itemx --verbose
+Verbosely list files processed. Further -v's (up to 4) increase the
+verbosity level.
+
+@item -0 .. -9
+Set the compression level for @option{--create}, @option{--append}, and
+@option{--compress}. The default compression level is @option{-6}. Like lzip,
+tarlz also minimizes the dictionary size of the lzip members it creates,
+reducing the amount of memory required for decompression.
+
+@multitable {Level} {Dictionary size} {Match length limit}
+@item Level @tab Dictionary size @tab Match length limit
+@item -0 @tab 64 KiB @tab 16 bytes
+@item -1 @tab 1 MiB @tab 5 bytes
+@item -2 @tab 1.5 MiB @tab 6 bytes
+@item -3 @tab 2 MiB @tab 8 bytes
+@item -4 @tab 3 MiB @tab 12 bytes
+@item -5 @tab 4 MiB @tab 20 bytes
+@item -6 @tab 8 MiB @tab 36 bytes
+@item -7 @tab 16 MiB @tab 68 bytes
+@item -8 @tab 24 MiB @tab 132 bytes
+@item -9 @tab 32 MiB @tab 273 bytes
+@end multitable
+
+@item --uncompressed
+With @option{--create}, don't compress the tar archive created. Create an
+uncompressed tar archive instead. With @option{--append}, don't compress the
+new members appended to the tar archive. Compressed members can't be
+appended to an uncompressed archive, nor vice versa. @option{--uncompressed}
+can be omitted if it can be deduced from the archive name. (An uncompressed
+archive name lacks a @samp{.lz} or @samp{.tlz} extension).
+
+@item --asolid
+When creating or appending to a compressed archive, use appendable solid
+compression. All the files being added to the archive are compressed into a
+single lzip member, but the end-of-archive blocks are compressed into a
+separate lzip member. This creates a solidly compressed appendable archive.
+Solid archives can't be created nor decoded in parallel.
+
+@anchor{--bsolid}
+@item --bsolid
+When creating or appending to a compressed archive, use block compression.
+Tar members are compressed together in a lzip member until they approximate
+a target uncompressed size. The size can't be exact because each solidly
+compressed data block must contain an integer number of tar members. Block
+compression is the default because it improves compression ratio for
+archives with many files smaller than the block size. This option allows
+tarlz revert to default behavior if, for example, it is invoked through an
+alias like @w{@samp{tar='tarlz --solid'}}. @xref{--data-size}, to set the
+target block size.
+
+@item --dsolid
+When creating or appending to a compressed archive, compress each file
+specified in the command line separately in its own lzip member, and use
+solid compression for each directory specified in the command line. The
+end-of-archive blocks are compressed into a separate lzip member. This
+creates a compressed appendable archive with a separate lzip member for each
+file or top-level directory specified.
+
+@item --no-solid
+When creating or appending to a compressed archive, compress each file
+separately in its own lzip member. The end-of-archive blocks are compressed
+into a separate lzip member. This creates a compressed appendable archive
+with a lzip member for each file.
+
+@item --solid
+When creating or appending to a compressed archive, use solid compression.
+The files being added to the archive, along with the end-of-archive blocks,
+are compressed into a single lzip member. The resulting archive is not
+appendable. No more files can be later appended to the archive. Solid
+archives can't be created nor decoded in parallel.
+
+@item --anonymous
+Equivalent to @w{@option{--owner=root --group=root}}.
+
+@item --owner=@var{owner}
+When creating or appending, use @var{owner} for files added to the archive.
+If @var{owner} is not a valid user name, it is decoded as a decimal numeric
+user ID.
+
+@item --group=@var{group}
+When creating or appending, use @var{group} for files added to the archive.
+If @var{group} is not a valid group name, it is decoded as a decimal numeric
+group ID.
+
+@item --exclude=@var{pattern}
+Exclude files matching a shell pattern like @samp{*.o}. A file is considered
+to match if any component of the file name matches. For example, @samp{*.o}
+matches @samp{foo.o}, @samp{foo.o/bar} and @samp{foo/bar.o}. If
+@var{pattern} contains a @samp{/}, it matches a corresponding @samp{/} in
+the file name. For example, @samp{foo/*.o} matches @samp{foo/bar.o}.
+Multiple @option{--exclude} options can be specified.
+
+@item --ignore-ids
+Make @option{--diff} ignore differences in owner and group IDs. This option is
+useful when comparing an @option{--anonymous} archive.
+
+@item --ignore-metadata
+Make @option{--diff} ignore any differences in metadata (file permissions,
+owner and group IDs, modification time). Compare only file type, file size,
+and file content. This option is useful when file permissions have not been
+fully restored because uid/gid changed on extraction.
+
+@item --ignore-overflow
+Make @option{--diff} ignore differences in mtime caused by overflow on 32-bit
+systems with a 32-bit time_t.
+
+@item --keep-damaged
+Don't delete partially extracted files. If a decompression error happens
+while extracting a file, keep the partial data extracted. Use this option to
+recover as much data as possible from each damaged member. It is recommended
+to run tarlz in single-threaded mode (@option{--threads=0}) when using this
+option.
+
+@item --missing-crc
+Exit with error status 2 if the CRC of the extended records is missing. When
+this option is used, tarlz detects any corruption in the extended records
+(only limited by CRC collisions). But note that a corrupt @samp{GNU.crc32}
+keyword, for example @samp{GNU.crc30}, is reported as a missing CRC instead
+of as a corrupt record. This misleading @w{@samp{Missing CRC}} message is
+the consequence of a flaw in the POSIX pax format; i.e., the lack of a
+mandatory check sequence of the extended records. @xref{crc32}.
+
+@item --mtime=@var{date}
+When creating or appending, use @var{date} as the modification time for
+files added to the archive instead of their actual modification times. The
+value of @var{date} may be either @samp{@@} followed by the number of
+seconds since (or before) the epoch, or a date in format
+@w{@samp{[-]YYYY-MM-DD HH:MM:SS}} or @samp{[-]YYYY-MM-DDTHH:MM:SS}, or the
+name of an existing reference file starting with @samp{.} or @samp{/} whose
+modification time is used. The time of day @samp{HH:MM:SS} in the date
+format is optional and defaults to @samp{00:00:00}. The epoch is
+@w{@samp{1970-01-01 00:00:00 UTC}}. Negative seconds or years define a
+modification time before the epoch.
+
+@item --out-slots=@var{n}
+Number of @w{1 MiB} output packets buffered per worker thread during
+multi-threaded creation or appending to compressed archives. Increasing the
+number of packets may increase compression speed if the files being archived
+are larger than @w{64 MiB} compressed, but requires more memory. Valid
+values range from 1 to 1024. The default value is 64.
+
+@item --warn-newer
+During archive creation, warn if any file being archived has a modification
+time newer than the archive creation time. This option may slow archive
+creation somewhat because it makes an extra call to @samp{stat} after
+archiving each file, but it guarantees that file contents were not modified
+during the creation of the archive. Note that the file must be at least one
+second newer than the archive for it to be detected as newer.
+
+@ignore
+@item --permissive
+Allow some violations of the archive format, like consecutive extended
+headers preceding a ustar header, or several records with the same
+keyword appearing in the same block of extended records.
+@end ignore
+
+@end table
+
+Exit status: 0 for a normal exit, 1 for environmental problems
+(file not found, files differ, invalid command-line options, I/O errors,
+etc), 2 to indicate a corrupt or invalid input file, 3 for an internal
+consistency error (e.g., bug) which caused tarlz to panic.
+
+
+@node Portable character set
+@chapter POSIX portable filename character set
+@cindex portable character set
+
+The set of characters from which portable file names are constructed.
+
+@example
+A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
+a b c d e f g h i j k l m n o p q r s t u v w x y z
+0 1 2 3 4 5 6 7 8 9 . _ -
+@end example
+
+The last three characters are the period, underscore, and hyphen-minus
+characters, respectively.
+
+File names are identifiers. Therefore, archiving works better when file
+names use only the portable character set without spaces added.
+
+
+@node File format
+@chapter File format
+@cindex file format
+
+In the diagram below, a box like this:
+
+@verbatim
++---+
+| | <-- the vertical bars might be missing
++---+
+@end verbatim
+
+represents one byte; a box like this:
+
+@verbatim
++==============+
+| |
++==============+
+@end verbatim
+
+represents a variable number of bytes or a fixed but large number of
+bytes (for example 512).
+
+@sp 1
+A tar.lz file consists of one or more lzip members (compressed data sets).
+The members simply appear one after another in the file, with no additional
+information before, between, or after them.
+
+Each lzip member contains one or more tar members in a simplified POSIX pax
+interchange format. The only pax typeflag value supported by tarlz (in
+addition to the typeflag values defined by the ustar format) is @samp{x}.
+The pax format is an extension on top of the ustar format that removes the
+size limitations of the ustar format.
+
+Each tar member contains one file archived, and is represented by the
+following sequence:
+
+@itemize @bullet
+@item
+An optional extended header block followed by one or more blocks that
+contain the extended header records as if they were the contents of a file;
+i.e., the extended header records are included as the data for this header
+block. This header block is of the form described in pax header block, with
+a typeflag value of @samp{x}.
+
+@item
+A header block in ustar format that describes the file. Any fields defined
+in the preceding optional extended header records override the associated
+fields in this header block for this file.
+
+@item
+Zero or more blocks that contain the contents of the file.
+@end itemize
+
+Each tar member must be contiguously stored in a lzip member for the
+parallel decoding operations like @option{--list} to work. If any tar member
+is split over two or more lzip members, the archive must be decoded
+sequentially. @xref{Multi-threaded decoding}.
+
+At the end of the archive file there are two 512-byte blocks filled with
+binary zeros, interpreted as an end-of-archive indicator. These EOA blocks
+are either compressed in a separate lzip member or compressed along with the
+tar members contained in the last lzip member. For a compressed archive to
+be recognized by tarlz as appendable, the last lzip member must contain
+between 512 and 32256 zeros alone (without any non-zero bytes).
+
+The diagram below shows the correspondence between each tar member (formed
+by one or two headers plus optional data) in the tar archive and each
+@uref{http://www.nongnu.org/lzip/manual/lzip_manual.html#File-format,,lzip member}
+in the resulting multimember tar.lz archive, when per file compression is
+used:
+@ifnothtml
+@xref{File format,,,lzip}.
+@end ifnothtml
+
+@verbatim
+tar
++========+======+=================+===============+========+======+========+
+| header | data | extended header | extended data | header | data | EOA |
++========+======+=================+===============+========+======+========+
+
+tar.lz
++===============+=================================================+========+
+| member | member | member |
++===============+=================================================+========+
+@end verbatim
+
+@ignore
+When @option{--permissive} is used, the following violations of the
+archive format are allowed:@*
+If several extended headers precede an ustar header, only the last
+extended header takes effect. The other extended headers are ignored.
+Similarly, if several records with the same keyword appear in the same
+block of extended records, only the last record for the repeated keyword
+takes effect. The other records for the repeated keyword are ignored.@*
+A global header inserted between an extended header and an ustar header.@*
+An extended header just before the end-of-archive blocks.
+@end ignore
+
+@sp 1
+@section Pax header block
+
+The pax header block is identical to the ustar header block described below
+except that the typeflag has the value @samp{x} (extended). The field
+@samp{size} is the size of the extended header data in bytes. Most other
+fields in the pax header block are zeroed on archive creation to prevent
+trouble if the archive is read by an ustar tool, and are ignored by tarlz on
+archive extraction. @xref{flawed-compat}.
+
+Tarlz limits the size of the pax extended header data so that the whole
+header set (extended header + extended data + ustar header) can be read and
+decoded in a buffer of size INT_MAX.
+
+The pax extended header data consists of one or more records, each of
+them constructed as follows:@*
+@w{@samp{"%d %s=%s\n", <length>, <keyword>, <value>}}
+
+The fields <length> and <keyword> in the record must be limited to the
+portable character set (@pxref{Portable character set}). The field <length>
+contains the decimal length of the record in bytes, including the trailing
+newline. The field <value> is stored as-is, without conversion to UTF-8 nor
+any other transformation. The fields are separated by the ASCII characters
+space, equal-sign, and newline.
+
+These are the <keyword> values currently supported by tarlz:
+
+@table @code
+@item atime
+The signed decimal representation of the access time of the following file
+in seconds since (or before) the epoch, obtained from the function
+@samp{stat}. The atime record is created only for files with a modification
+time outside of the ustar range. @xref{ustar-mtime}.
+
+@item gid
+The unsigned decimal representation of the group ID of the group that owns
+the following file. The gid record is created only for files with a group ID
+greater than 2_097_151 @w{(octal 7_777_777)}. @xref{ustar-uid-gid}.
+
+@item linkpath
+The file name of a link being created to another file, of any type,
+previously archived. This record overrides the field @samp{linkname} in the
+following ustar header block. The following ustar header block determines
+the type of link created. If typeflag of the following header block is 1, a
+hard link is created. If typeflag is 2, a symbolic link is created and the
+linkpath value is used as the contents of the symbolic link. The linkpath
+record is created only for links with a link name that does not fit in the
+space provided by the ustar header.
+
+@item mtime
+The signed decimal representation of the modification time of the following
+file in seconds since (or before) the epoch, obtained from the function
+@samp{stat}. This record overrides the field @samp{mtime} in the following
+ustar header block. The mtime record is created only for files with a
+modification time outside of the ustar range. @xref{ustar-mtime}.
+
+@item path
+The file name of the following file. This record overrides the fields
+@samp{name} and @samp{prefix} in the following ustar header block. The path
+record is created for files with a name that does not fit in the space
+provided by the ustar header, but is also created for files that require any
+other extended record so that the fields @samp{name} and @samp{prefix} in
+the following ustar header block can be zeroed.
+
+@item size
+The size of the file in bytes, expressed as a decimal number using digits
+from the ISO/IEC 646:1991 (ASCII) standard. This record overrides the field
+@samp{size} in the following ustar header block. The size record is created
+only for files with a size value greater than 8_589_934_591
+@w{(octal 77_777_777_777)}; that is, @w{8 GiB} (2^33 bytes) or larger.
+
+@item uid
+The unsigned decimal representation of the user ID of the file owner of the
+following file. The uid record is created only for files with a user ID
+greater than 2_097_151 @w{(octal 7_777_777)}. @xref{ustar-uid-gid}.
+
+@anchor{key_crc32}
+@item GNU.crc32
+CRC32-C (Castagnoli) of the extended header data excluding the 8 bytes
+representing the CRC <value> itself. The <value> is represented as 8
+hexadecimal digits in big endian order,
+@w{@samp{22 GNU.crc32=00000000\n}}. The keyword of the CRC record is
+protected by the CRC to guarantee that corruption is always detected when
+using @option{--missing-crc} (except in case of CRC collision). A CRC was
+chosen because a checksum is too weak for a potentially large list of
+variable sized records. A checksum can't detect simple errors like the
+swapping of two bytes.
+
+@end table
+
+At verbosity level 1 or higher tarlz prints a diagnostic for each unknown
+extended header keyword found in an archive, once per keyword.
+
+@sp 1
+@section Ustar header block
+
+The ustar header block has a length of 512 bytes and is structured as
+shown in the following table. All lengths and offsets are in decimal.
+
+@multitable {Field Name} {Offset} {Length (in bytes)}
+@item Field Name @tab Offset @tab Length (in bytes)
+@item name @tab 0 @tab 100
+@item mode @tab 100 @tab 8
+@item uid @tab 108 @tab 8
+@item gid @tab 116 @tab 8
+@item size @tab 124 @tab 12
+@item mtime @tab 136 @tab 12
+@item chksum @tab 148 @tab 8
+@item typeflag @tab 156 @tab 1
+@item linkname @tab 157 @tab 100
+@item magic @tab 257 @tab 6
+@item version @tab 263 @tab 2
+@item uname @tab 265 @tab 32
+@item gname @tab 297 @tab 32
+@item devmajor @tab 329 @tab 8
+@item devminor @tab 337 @tab 8
+@item prefix @tab 345 @tab 155
+@end multitable
+
+All characters in the header block are coded using the ISO/IEC 646:1991
+(ASCII) standard, except in fields storing names for files, users, and
+groups. For maximum portability between implementations, names should only
+contain characters from the portable character set (@pxref{Portable
+character set}), but if an implementation supports the use of characters
+outside of @samp{/} and the portable character set in names for files,
+users, and groups, tarlz will use the byte values in these names unmodified.
+
+The fields @samp{name}, @samp{linkname}, and @samp{prefix} are
+null-terminated character strings except when all characters in the array
+contain non-null characters including the last character.
+
+The fields @samp{name} and @samp{prefix} produce the file name. A new file
+name is formed, if prefix is not an empty string (its first character is not
+null), by concatenating prefix (up to the first null character), a slash
+character, and name; otherwise, name is used alone. In either case, name is
+terminated at the first null character. If prefix begins with a null
+character, it is ignored. In this manner, file names of at most 256
+characters can be supported. If a file name does not fit in the space
+provided, an extended record is used to store the file name.
+
+The field @samp{linkname} does not use the prefix to produce a file name. If
+the link name does not fit in the 100 characters provided, an extended
+record is used to store the link name.
+
+The field @samp{mode} provides 12 access permission bits. The following
+table shows the symbolic name of each bit and its octal value:
+
+@multitable {Bit Name} {Value} {Bit Name} {Value} {Bit Name} {Value}
+@headitem Bit Name @tab Value @tab Bit Name @tab Value @tab Bit Name @tab Value
+@item S_ISUID @tab 04000 @tab S_ISGID @tab 02000 @tab S_ISVTX @tab 01000
+@item S_IRUSR @tab 00400 @tab S_IWUSR @tab 00200 @tab S_IXUSR @tab 00100
+@item S_IRGRP @tab 00040 @tab S_IWGRP @tab 00020 @tab S_IXGRP @tab 00010
+@item S_IROTH @tab 00004 @tab S_IWOTH @tab 00002 @tab S_IXOTH @tab 00001
+@end multitable
+
+@anchor{ustar-uid-gid}
+The fields @samp{uid} and @samp{gid} are the user and group IDs of the owner
+and group of the file, respectively. If the file uid or gid are greater than
+2_097_151 @w{(octal 7_777_777)}, an extended record is used to store the uid
+or gid.
+
+The field @samp{size} contains the octal representation of the size of the
+file in bytes. If the field @samp{typeflag} specifies a file of type '0'
+(regular file) or '7' (high performance regular file), the number of logical
+records following the header is @w{(size / 512)} rounded to the next
+integer. For all other values of typeflag, tarlz either sets the size field
+to 0 or ignores it, and does not store or expect any logical records
+following the header. If the file size is larger than 8_589_934_591 bytes
+@w{(octal 77_777_777_777)}, an extended record is used to store the file size.
+
+@anchor{ustar-mtime}
+The field @samp{mtime} contains the octal representation of the modification
+time of the file at the time it was archived, obtained from the function
+@samp{stat}. If the modification time is negative or larger than
+8_589_934_591 @w{(octal 77_777_777_777)} seconds since the epoch, an extended
+record is used to store the modification time. The ustar range of mtime goes
+from @w{@samp{1970-01-01 00:00:00 UTC}} to @w{@samp{2242-03-16 12:56:31 UTC}}.
+
+The field @samp{chksum} contains the octal representation of the value of
+the simple sum of all bytes in the header logical record. Each byte in the
+header is treated as an unsigned value. When calculating the checksum, the
+chksum field is treated as if it were all space characters.
+
+The field @samp{typeflag} contains a single character specifying the type of
+file archived:
+
+@table @code
+@item '0'
+Regular file.
+
+@item '1'
+Hard link to another file, of any type, previously archived. Hard links must
+not contain file data.
+
+@item '2'
+Symbolic link.
+
+@item '3', '4'
+Character special file and block special file respectively. In this case the
+fields @samp{devmajor} and @samp{devminor} contain information defining the
+device in unspecified format.
+
+@item '5'
+Directory.
+
+@item '6'
+FIFO special file.
+
+@item '7'
+Reserved to represent a file to which an implementation has associated some
+high-performance attribute (contiguous file). Tarlz treats this type of file
+as a regular file (type 0).
+
+@end table
+
+The field @samp{magic} contains the ASCII null-terminated string "ustar".
+The field @samp{version} contains the characters "00" (0x30,0x30). The
+fields @samp{uname} and @samp{gname} are null-terminated character strings
+except when all characters in the array contain non-null characters
+including the last character. Each numeric field contains a leading space-
+or zero-filled, optionally null-terminated octal number using digits from
+the ISO/IEC 646:1991 (ASCII) standard. Tarlz is able to decode numeric
+fields 1 byte longer than standard ustar by not requiring a terminating null
+character.
+
+
+@node Amendments to pax format
+@chapter The reasons for the differences with pax
+@cindex Amendments to pax format
+
+Tarlz creates safe archives that allow the reliable detection of invalid or
+corrupt metadata during decoding even when the integrity checking of lzip
+can't be used because the lzip members are only decompressed partially, as
+it happens in parallel @option{--diff}, @option{--list}, and @option{--extract}.
+In order to achieve this goal and avoid some other flaws in the pax format,
+tarlz makes some changes to the variant of the pax format that it uses. This
+chapter describes these changes and the concrete reasons to implement them.
+
+@sp 1
+@anchor{crc32}
+@section Add a CRC of the extended records
+
+The POSIX pax format has a serious flaw. The metadata stored in pax extended
+records are not protected by any kind of check sequence. Corruption in a
+long file name may cause the extraction of the file in the wrong place
+without warning. Corruption in a large file size may cause the truncation of
+the file or the appending of garbage to the file, both followed by a
+spurious warning about a corrupt header far from the place of the undetected
+corruption.
+
+Metadata like file name and file size must be always protected in an archive
+format because of the adverse effects of undetected corruption in them,
+potentially much worse that undetected corruption in the data. Even more so
+in the case of pax because the amount of metadata it stores is potentially
+large, making undetected corruption and archiver misbehavior more probable.
+
+Headers and metadata must be protected separately from data because the
+integrity checking of lzip may not be able to detect the corruption before
+the metadata have been used, for example, to create a new file in the wrong
+place.
+
+Because of the above, tarlz protects the extended records with a Cyclic
+Redundancy Check (CRC) in a way compatible with standard tar tools.
+@xref{key_crc32}.
+
+@sp 1
+@anchor{flawed-compat}
+@section Remove flawed backward compatibility
+
+In order to allow the extraction of pax archives by a tar utility conforming
+to the POSIX-2:1993 standard, POSIX.1-2008 recommends selecting extended
+header field values that allow such tar to create a regular file containing
+the extended header records as data. This approach is broken because if the
+extended header is needed because of a long file name, the fields
+@samp{name} and @samp{prefix} are unable to contain the full file name.
+(Some tar implementations store the truncated name in the field @samp{name}
+alone, truncating the name to only 100 bytes instead of 256). Therefore the
+files corresponding to both the extended header and the overridden ustar
+header are extracted using truncated file names, perhaps overwriting
+existing files or directories. It may be a security risk to extract a file
+with a truncated file name.
+
+To avoid this problem, tarlz writes extended headers with all fields zeroed
+except @samp{size} (which contains the size of the extended records),
+@samp{chksum}, @samp{typeflag}, @samp{magic}, and @samp{version}. In
+particular, tarlz sets the fields @samp{name} and @samp{prefix} to zero.
+This prevents old tar programs from extracting the extended records as a
+file in the wrong place. Tarlz also sets to zero those fields of the ustar
+header overridden by extended records. Finally, tarlz skips members with
+zeroed @samp{name} and @samp{prefix} when decoding, except when listing.
+This is needed to detect certain format violations during parallel
+extraction.
+
+If an extended header is required for any reason (for example a file size of
+@w{8 GiB} or larger, or a link name longer than 100 bytes), tarlz also moves
+the file name to the extended records to prevent an ustar tool from trying
+to extract the file or link. This also makes easier during parallel decoding
+the detection of a tar member split between two lzip members at the boundary
+between the extended header and the ustar header.
+
+@sp 1
+@section As simple as possible (but not simpler)
+
+The tarlz format is mainly ustar. Extended pax headers are used only when
+needed because the length of a file name or link name, or the size or other
+attribute of a file exceed the limits of the ustar format. Adding @w{1 KiB}
+of extended header and records to each member just to save subsecond
+timestamps seems wasteful for a backup format. Moreover, minimizing the
+overhead may help recovering the archive with lziprecover in case of
+corruption.
+
+Global pax headers are tolerated, but not supported; they are parsed and
+ignored. Some operations may not behave as expected if the archive contains
+global headers.
+
+@sp 1
+@section Improve reproducibility
+
+Pax includes by default the process ID of the pax process in the ustar name
+of the extended headers, making the archive not reproducible. Tarlz stores
+the true name of the file just once, either in the ustar header or in the
+extended records, making it easier to produce reproducible archives.
+
+Pax allows an extended record to have length x-1 or x if x is a power of
+ten; @samp{99<97_bytes>} or @samp{100<97_bytes>}. Tarlz minimizes the length
+of the record and always produces a length of x-1 in these cases.
+
+@sp 1
+@section No data in hard links
+
+Tarlz does not allow data in hard link members. The data (if any) must be in
+the member determining the type of the file (which can't be a link). If all
+the names of a file are stored as hard links, the type of the file is lost.
+Not allowing data in hard links also prevents invalid actions like
+extracting file data for a hard link to a symbolic link or to a directory.
+
+@sp 1
+@section Avoid misconversions to/from UTF-8
+
+There is no portable way to tell what charset a text string is coded into.
+Therefore, tarlz stores all fields representing text strings unmodified,
+without conversion to UTF-8 nor any other transformation. This prevents
+accidental double UTF-8 conversions. If the need arises this behavior will
+be adjusted with a command-line option in the future.
+
+
+@node Program design
+@chapter Internal structure of tarlz
+@cindex program design
+
+The parts of tarlz related to sequential processing of the archive are more
+or less similar to any other tar and won't be described here. The interesting
+parts described here are those related to Multi-threaded processing.
+
+The structure of the part of tarlz performing Multi-threaded archive
+creation is somewhat similar to that of
+@uref{http://www.nongnu.org/lzip/plzip.html#Program-design,,plzip} with the
+added complication of the solidity levels.
+@ifnothtml
+@xref{Program design,,,plzip}.
+@end ifnothtml
+A grouper thread and several worker threads are created, acting the main
+thread as muxer (multiplexer) thread. A "packet courier" takes care of data
+transfers among threads and limits the maximum number of data blocks
+(packets) being processed simultaneously.
+
+The grouper traverses the directory tree, groups together the metadata of
+the files to be archived in each lzip member, and distributes them to the
+workers. The workers compress the metadata received from the grouper along
+with the file data read from the file system. The muxer collects processed
+packets from the workers, and writes them to the archive.
+
+@verbatim
+.--------.
+| data|---> to each worker below
+| | .------------.
+| file | ,-->| worker 0 |--,
+| system | | `------------' |
+| | .---------. | .------------. | .-------. .---------.
+|metadata|--->| grouper |-+-->| worker 1 |--+-->| muxer |-->| archive |
+`--------' `---------' | `------------' | `-------' `---------'
+ | ... |
+ | .------------. |
+ `-->| worker N-1 |--'
+ `------------'
+@end verbatim
+
+Decoding an archive is somewhat similar to how plzip decompresses a regular
+file to standard output, with the differences that it is not the data but
+only messages what is written to stdout/stderr, and that each worker may
+access files in the file system either to read them (diff) or write them
+(extract). As in plzip, each worker reads members directly from the archive.
+
+@verbatim
+.--------.
+| file |<---> data to/from each worker below
+| system |
+`--------' .------------.
+ ,-->| worker 0 |--,
+ | `------------' |
+.---------. | .------------. | .-------. .--------.
+| archive |-+-->| worker 1 |--+-->| muxer |-->| stdout |
+`---------' | `------------' | `-------' | stderr |
+ | ... | `--------'
+ | .------------. |
+ `-->| worker N-1 |--'
+ `------------'
+@end verbatim
+
+As misaligned tar.lz archives can't be decoded in parallel, and the
+misalignment can't be detected until after decoding has started, a
+"mastership request" mechanism has been designed that allows the decoding to
+continue instead of signalling an error.
+
+During parallel decoding, if a worker finds a misalignment, it requests
+mastership to decode the rest of the archive. When mastership is requested,
+an error_member_id is set, and all subsequently received packets with
+member_id > error_member_id are rejected. All workers requesting mastership
+are blocked at the request_mastership call until mastership is granted.
+Mastership is granted to the delivering worker when its queue is empty to
+make sure that all preceding packets have been processed. When mastership is
+granted, all packets are deleted and all subsequently received packets not
+coming from the master are rejected.
+
+If a worker can't continue decoding for any cause (for example lack of
+memory or finding a split tar member at the beginning of a lzip member), it
+requests mastership to print an error and terminate the program. Only if
+some other worker requests mastership in a previous lzip member can this
+error be avoided.
+
+
+@node Multi-threaded decoding
+@chapter Limitations of parallel tar decoding
+@cindex parallel tar decoding
+
+Safely decoding an arbitrary tar archive in parallel is only possible if one
+decodes the headers sequentially first. For example, if a tar archive
+containing another tar archive is decoded starting from some position other
+than the beginning, there is no way to know if the first header found there
+belongs to the outer tar archive or to the inner tar archive. Tar is a
+format inherently serial; it was designed for tapes.
+
+The pax format is even more serial than the ustar format. Two headers need
+to be decoded sequentially for each file. The extended header may even need
+parsing to reveal something as basic as file size. If a thread decodes the
+ustar header skipping the preceding extended header, it may extract a file
+of incorrect size at the wrong place. Moreover, a pax archive with global
+headers can't be decoded in parallel because each thread can't know about
+the global headers decoded by other threads.
+
+In the case of compressed tar archives, the start of each compressed block
+determines one point through which the tar archive can be decoded in
+parallel. Therefore, in tar.lz archives the decoding operations can't be
+parallelized if the tar members are not aligned with the lzip members. Tar
+archives compressed with plzip can't be decoded in parallel because tar and
+plzip do not have a way to align both sets of members. Certainly one can
+decompress one such archive with a multi-threaded tool like plzip, but the
+increase in speed is not as large as it could be because plzip must
+serialize the decompressed data and pass them to tar, which decodes them
+sequentially, one tar member at a time.
+
+On the other hand, if the tar.lz archive is created with a tool like tarlz,
+which can guarantee the alignment between tar members and lzip members
+because it controls both archiving and compression, then the lzip format
+becomes an indexed layer on top of the tar archive which makes possible
+decoding it safely in parallel.
+
+Tarlz is able to automatically decode aligned and unaligned multimember
+tar.lz archives, keeping backwards compatibility. If tarlz finds a member
+misalignment during multi-threaded decoding, it switches to single-threaded
+mode and continues decoding the archive.
+
+If the files in the archive are large, multi-threaded @option{--list} on a
+regular (seekable) tar.lz archive can be hundreds of times faster than
+sequential @option{--list} because, in addition to using several processors,
+it only needs to decompress part of each lzip member. See the following
+example listing the Silesia corpus on a dual core machine:
+
+@example
+tarlz -9 --no-solid -cf silesia.tar.lz silesia
+time lzip -cd silesia.tar.lz | tar -tf - (5.032s)
+time plzip -cd silesia.tar.lz | tar -tf - (3.256s)
+time tarlz -tf silesia.tar.lz (0.020s)
+@end example
+
+On the other hand, multi-threaded @option{--list} won't detect corruption in
+the tar member data because it only decodes the part of each lzip member
+corresponding to the tar member header. This is another reason why the tar
+headers must provide their own integrity checking.
+
+@sp 1
+@anchor{mt-extraction}
+@section Limitations of multi-threaded extraction
+
+Multi-threaded extraction may produce different output than single-threaded
+extraction in some cases:
+
+During multi-threaded extraction, several independent threads are
+simultaneously reading the archive and creating files in the file system.
+The archive is not read sequentially. As a consequence, any error or
+weirdness in the archive (like a corrupt member or an end-of-archive block
+in the middle of the archive) won't be usually detected until part of the
+archive beyond that point has been processed.
+
+If the archive contains two or more tar members with the same name,
+single-threaded extraction extracts the members in the order they appear in
+the archive and leaves in the file system the last version of the file. But
+multi-threaded extraction may extract the members in any order and leave in
+the file system any version of the file nondeterministically. It is
+unspecified which of the tar members is extracted.
+
+If the same file is extracted through several paths (different member names
+resolve to the same file in the file system), the result is undefined.
+(Probably the resulting file will be mangled).
+
+Extraction of a hard link may fail if it is extracted before the file it
+links to.
+
+
+@node Minimum archive sizes
+@chapter Minimum archive sizes required for multi-threaded block compression
+@cindex minimum archive sizes
+
+When creating or appending to a compressed archive using multi-threaded
+block compression, tarlz puts tar members together in blocks and compresses
+as many blocks simultaneously as worker threads are chosen, creating a
+multimember compressed archive.
+
+For this to work as expected (and roughly multiply the compression speed by
+the number of available processors), the uncompressed archive must be at
+least as large as the number of worker threads times the block size
+(@pxref{--data-size}). Else some processors do not get any data to compress,
+and compression is proportionally slower. The maximum speed increase
+achievable on a given archive is limited by the ratio
+@w{(uncompressed_size / data_size)}. For example, a tarball the size of gcc
+or linux scales up to 10 or 14 processors at level -9.
+
+The following table shows the minimum uncompressed archive size needed for
+full use of N processors at a given compression level, using the default
+data size for each level:
+
+@multitable {Processors} {512 MiB} {512 MiB} {512 MiB} {512 MiB} {512 MiB} {512 MiB}
+@headitem Processors @tab 2 @tab 4 @tab 8 @tab 16 @tab 64 @tab 256
+@item Level
+@item -0 @tab 2 MiB @tab 4 MiB @tab 8 MiB @tab 16 MiB @tab 64 MiB @tab 256 MiB
+@item -1 @tab 4 MiB @tab 8 MiB @tab 16 MiB @tab 32 MiB @tab 128 MiB @tab 512 MiB
+@item -2 @tab 6 MiB @tab 12 MiB @tab 24 MiB @tab 48 MiB @tab 192 MiB @tab 768 MiB
+@item -3 @tab 8 MiB @tab 16 MiB @tab 32 MiB @tab 64 MiB @tab 256 MiB @tab 1 GiB
+@item -4 @tab 12 MiB @tab 24 MiB @tab 48 MiB @tab 96 MiB @tab 384 MiB @tab 1.5 GiB
+@item -5 @tab 16 MiB @tab 32 MiB @tab 64 MiB @tab 128 MiB @tab 512 MiB @tab 2 GiB
+@item -6 @tab 32 MiB @tab 64 MiB @tab 128 MiB @tab 256 MiB @tab 1 GiB @tab 4 GiB
+@item -7 @tab 64 MiB @tab 128 MiB @tab 256 MiB @tab 512 MiB @tab 2 GiB @tab 8 GiB
+@item -8 @tab 96 MiB @tab 192 MiB @tab 384 MiB @tab 768 MiB @tab 3 GiB @tab 12 GiB
+@item -9 @tab 128 MiB @tab 256 MiB @tab 512 MiB @tab 1 GiB @tab 4 GiB @tab 16 GiB
+@end multitable
+
+
+@node Examples
+@chapter A small tutorial with examples
+@cindex examples
+
+@noindent
+Example 1: Create a multimember compressed archive @samp{archive.tar.lz}
+containing files @samp{a}, @samp{b} and @samp{c}.
+
+@example
+tarlz -cf archive.tar.lz a b c
+@end example
+
+@sp 1
+@noindent
+Example 2: Append files @samp{d} and @samp{e} to the multimember compressed
+archive @samp{archive.tar.lz}.
+
+@example
+tarlz -rf archive.tar.lz d e
+@end example
+
+@sp 1
+@noindent
+Example 3: Create a solidly compressed appendable archive
+@samp{archive.tar.lz} containing files @samp{a}, @samp{b} and @samp{c}.
+Then append files @samp{d} and @samp{e} to the archive.
+
+@example
+tarlz --asolid -cf archive.tar.lz a b c
+tarlz --asolid -rf archive.tar.lz d e
+@end example
+
+@sp 1
+@noindent
+Example 4: Create a compressed appendable archive containing directories
+@samp{dir1}, @samp{dir2} and @samp{dir3} with a separate lzip member per
+directory. Then append files @samp{a}, @samp{b}, @samp{c}, @samp{d} and
+@samp{e} to the archive, all of them contained in a single lzip member.
+The resulting archive @samp{archive.tar.lz} contains 5 lzip members
+(including the end-of-archive member).
+
+@example
+tarlz --dsolid -cf archive.tar.lz dir1 dir2 dir3
+tarlz --asolid -rf archive.tar.lz a b c d e
+@end example
+
+@sp 1
+@noindent
+Example 5: Create a solidly compressed archive @samp{archive.tar.lz}
+containing files @samp{a}, @samp{b} and @samp{c}. Note that no more
+files can be later appended to the archive.
+
+@example
+tarlz --solid -cf archive.tar.lz a b c
+@end example
+
+@sp 1
+@noindent
+Example 6: Extract all files from archive @samp{archive.tar.lz}.
+
+@example
+tarlz -xf archive.tar.lz
+@end example
+
+@sp 1
+@noindent
+Example 7: Extract files @samp{a} and @samp{c}, and the whole tree under
+directory @samp{dir1} from archive @samp{archive.tar.lz}.
+
+@example
+tarlz -xf archive.tar.lz a c dir1
+@end example
+
+@sp 1
+@noindent
+Example 8: Copy the contents of directory @samp{sourcedir} to the directory
+@samp{destdir}.
+
+@example
+tarlz -C sourcedir --uncompressed -cf - . | tarlz -C destdir -xf -
+@end example
+
+@sp 1
+@noindent
+Example 9: Compress the existing POSIX archive @samp{archive.tar} and write
+the output to @samp{archive.tar.lz}. Compress each member individually for
+maximum availability. (If one member in the compressed archive gets damaged,
+the other members can still be extracted).
+
+@example
+tarlz -z --no-solid archive.tar
+@end example
+
+@sp 1
+@noindent
+Example 10: Compress the archive @samp{archive.tar} and write the output to
+@samp{foo.tar.lz}.
+
+@example
+tarlz -z -o foo.tar.lz archive.tar
+@end example
+
+@sp 1
+@noindent
+Example 11: Concatenate and compress two archives @samp{archive1.tar} and
+@samp{archive2.tar}, and write the output to @samp{foo.tar.lz}.
+
+@example
+tarlz -A archive1.tar archive2.tar | tarlz -z -o foo.tar.lz
+@end example
+
+
+@node Problems
+@chapter Reporting bugs
+@cindex bugs
+@cindex getting help
+
+There are probably bugs in tarlz. There are certainly errors and
+omissions in this manual. If you report them, they will get fixed. If
+you don't, no one will ever know about them and they will remain unfixed
+for all eternity, if not longer.
+
+If you find a bug in tarlz, please send electronic mail to
+@email{lzip-bug@@nongnu.org}. Include the version number, which you can
+find by running @w{@samp{tarlz --version}} and
+@w{@samp{tarlz -v --check-lib}}.
+
+
+@node Concept index
+@unnumbered Concept index
+
+@printindex cp
+
+@bye
diff --git a/exclude.cc b/exclude.cc
new file mode 100644
index 0000000..44a53a5
--- /dev/null
+++ b/exclude.cc
@@ -0,0 +1,53 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <fnmatch.h>
+
+#include "tarlz.h"
+
+
+namespace Exclude {
+
+std::vector< std::string > patterns; // list of patterns
+
+} // end namespace Exclude
+
+
+void Exclude::add_pattern( const std::string & arg )
+ { patterns.push_back( arg ); }
+
+
+bool Exclude::excluded( const char * const filename )
+ {
+ if( patterns.empty() ) return false;
+ const char * p = filename;
+ do {
+ for( unsigned i = 0; i < patterns.size(); ++i )
+ // ignore a trailing sequence starting with '/' in filename
+#ifdef FNM_LEADING_DIR
+ if( fnmatch( patterns[i].c_str(), p, FNM_LEADING_DIR ) == 0 ) return true;
+#else
+ if( fnmatch( patterns[i].c_str(), p, 0 ) == 0 ||
+ fnmatch( ( patterns[i] + "/*" ).c_str(), p, 0 ) == 0 ) return true;
+#endif
+ while( *p && *p != '/' ) ++p; // skip component
+ while( *p == '/' ) ++p; // skip slashes
+ } while( *p );
+ return false;
+ }
diff --git a/extended.cc b/extended.cc
new file mode 100644
index 0000000..0dfba9b
--- /dev/null
+++ b/extended.cc
@@ -0,0 +1,422 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cctype>
+#include <cerrno>
+#include <cstdio>
+
+#include "tarlz.h"
+
+
+const CRC32 crc32c( true );
+
+
+namespace {
+
+unsigned record_size( const unsigned keyword_size, const unsigned value_size )
+ {
+ /* length + ' ' + keyword + '=' + value + '\n'
+ minimize length; prefer "99<97_bytes>" to "100<97_bytes>" */
+ unsigned size = 1 + keyword_size + 1 + value_size + 1;
+ size += decimal_digits( decimal_digits( size ) + size );
+ return size;
+ }
+
+
+long long parse_decimal( const char * const ptr, const char ** const tailp,
+ const int size, const unsigned long long limit = LLONG_MAX )
+ {
+ unsigned long long result = 0;
+ int i = 0;
+ while( i < size && std::isspace( (unsigned char)ptr[i] ) ) ++i;
+ if( !std::isdigit( (unsigned char)ptr[i] ) ) { *tailp = ptr; return -1; }
+ for( ; i < size && std::isdigit( (unsigned char)ptr[i] ); ++i )
+ {
+ const unsigned long long prev = result;
+ result *= 10; result += ptr[i] - '0';
+ if( result < prev || result > limit || result > LLONG_MAX ) // overflow
+ { *tailp = ptr; return -1; }
+ }
+ *tailp = ptr + i;
+ return result;
+ }
+
+
+uint32_t parse_record_crc( const char * const ptr )
+ {
+ uint32_t crc = 0;
+ for( int i = 0; i < 8; ++i )
+ {
+ crc <<= 4;
+ if( ptr[i] >= '0' && ptr[i] <= '9' ) crc += ptr[i] - '0';
+ else if( ptr[i] >= 'A' && ptr[i] <= 'F' ) crc += ptr[i] + 10 - 'A';
+ else if( ptr[i] >= 'a' && ptr[i] <= 'f' ) crc += ptr[i] + 10 - 'a';
+ else { crc = 0; break; } // invalid digit in crc string
+ }
+ return crc;
+ }
+
+
+unsigned char xdigit( const unsigned value ) // hex digit for 'value'
+ {
+ if( value <= 9 ) return '0' + value;
+ if( value <= 15 ) return 'A' + value - 10;
+ return 0;
+ }
+
+void print_hex( char * const buf, int size, unsigned long long num )
+ { while( --size >= 0 ) { buf[size] = xdigit( num & 0x0F ); num >>= 4; } }
+
+void print_decimal( char * const buf, int size, unsigned long long num )
+ { while( --size >= 0 ) { buf[size] = num % 10 + '0'; num /= 10; } }
+
+int print_size_keyword( char * const buf, const int size, const char * keyword )
+ {
+ // "size keyword=value\n"
+ int pos = decimal_digits( size );
+ print_decimal( buf, pos, size ); buf[pos++] = ' ';
+ while( *keyword ) { buf[pos++] = *keyword; ++keyword; } buf[pos++] = '=';
+ return pos;
+ }
+
+bool print_record( char * const buf, const int size, const char * keyword,
+ const std::string & value )
+ {
+ int pos = print_size_keyword( buf, size, keyword );
+ std::memcpy( buf + pos, value.c_str(), value.size() );
+ pos += value.size(); buf[pos++] = '\n';
+ return pos == size;
+ }
+
+bool print_record( char * const buf, const int size, const char * keyword,
+ const unsigned long long value )
+ {
+ int pos = print_size_keyword( buf, size, keyword );
+ const int vd = decimal_digits( value );
+ print_decimal( buf + pos, vd, value ); pos += vd; buf[pos++] = '\n';
+ return pos == size;
+ }
+
+bool print_record( char * const buf, const int size, const char * keyword,
+ const Etime & value )
+ {
+ int pos = print_size_keyword( buf, size, keyword );
+ pos += value.print( buf + pos ); buf[pos++] = '\n';
+ return pos == size;
+ }
+
+} // end namespace
+
+
+unsigned Etime::decimal_size() const
+ {
+ unsigned size = 1 + ( sec_ < 0 ); // first digit + negative sign
+ for( long long n = sec_; n >= 10 || n <= -10; n /= 10 ) ++size;
+ if( nsec_ > 0 && nsec_ <= 999999999 )
+ { size += 2; // decimal point + first fractional digit
+ for( int n = nsec_; n >= 10; n /= 10 ) ++size; }
+ return size;
+ }
+
+unsigned Etime::print( char * const buf ) const
+ {
+ int len = 0;
+ if( nsec_ > 0 && nsec_ <= 999999999 )
+ { for( int n = nsec_; n > 0; n /= 10 ) buf[len++] = n % 10 + '0';
+ buf[len++] = '.'; }
+ long long n = sec_;
+ do { long long on = n; n /= 10; buf[len++] = llabs( on - 10 * n ) + '0'; }
+ while( n != 0 );
+ if( sec_ < 0 ) buf[len++] = '-';
+ for( int i = 0; i < len / 2; ++i ) std::swap( buf[i], buf[len-i-1] );
+ return len;
+ }
+
+bool Etime::parse( const char * const ptr, const char ** const tailp,
+ const int size )
+ {
+ char * tail;
+ errno = 0;
+ long long s = strtoll( ptr, &tail, 10 );
+ if( tail == ptr || tail - ptr > size || errno ||
+ ( *tail != 0 && *tail != '\n' && *tail != '.' ) ) return false;
+ int ns = 0;
+ if( *tail == '.' ) // parse nanoseconds and any extra digits
+ {
+ ++tail;
+ if( tail - ptr >= size || !std::isdigit( (unsigned char)*tail ) )
+ return false;
+ for( int factor = 100000000;
+ tail - ptr < size && std::isdigit( (unsigned char)*tail );
+ ++tail, factor /= 10 )
+ ns += factor * ( *tail - '0' );
+ }
+ sec_ = s; nsec_ = ns; if( tailp ) *tailp = tail;
+ return true;
+ }
+
+
+std::vector< std::string > Extended::unknown_keywords;
+const std::string Extended::crc_record( "22 GNU.crc32=00000000\n" );
+
+void Extended::calculate_sizes() const
+ {
+ if( linkpath_.size() > max_edata_size || path_.size() > max_edata_size )
+ { full_size_ = -3; return; }
+ linkpath_recsize_ = linkpath_.size() ? record_size( 8, linkpath_.size() ) : 0;
+ path_recsize_ = path_.size() ? record_size( 4, path_.size() ) : 0;
+ file_size_recsize_ =
+ ( file_size_ > 0 ) ? record_size( 4, decimal_digits( file_size_ ) ) : 0;
+ uid_recsize_ = ( uid_ >= 0 ) ? record_size( 3, decimal_digits( uid_ ) ) : 0;
+ gid_recsize_ = ( gid_ >= 0 ) ? record_size( 3, decimal_digits( gid_ ) ) : 0;
+ atime_recsize_ =
+ atime_.out_of_ustar_range() ? record_size( 5, atime_.decimal_size() ) : 0;
+ mtime_recsize_ =
+ mtime_.out_of_ustar_range() ? record_size( 5, mtime_.decimal_size() ) : 0;
+ const long long tmp = linkpath_recsize_ + path_recsize_ +
+ file_size_recsize_ + uid_recsize_ + gid_recsize_ +
+ atime_recsize_ + mtime_recsize_ + crc_record.size();
+ if( tmp > max_edata_size ) { full_size_ = -3; return; }
+ edsize_ = tmp;
+ padded_edsize_ = round_up( edsize_ );
+ if( padded_edsize_ > max_edata_size ) { full_size_ = -3; return; }
+ full_size_ = header_size + padded_edsize_;
+ }
+
+
+// print a diagnostic for each unknown keyword once per keyword
+void Extended::unknown_keyword( const char * const buf, const int size ) const
+ {
+ int eq_pos = 0; // position of '=' in buf
+ while( eq_pos < size && buf[eq_pos] != '=' ) ++eq_pos;
+ const std::string keyword( buf, eq_pos );
+ for( unsigned i = 0; i < unknown_keywords.size(); ++i )
+ if( keyword == unknown_keywords[i] ) return;
+ unknown_keywords.push_back( keyword );
+ print_error( 0, "Ignoring unknown extended header keyword '%s'",
+ keyword.c_str() );
+ }
+
+
+/* Return the size of the extended block, or 0 if empty.
+ Return -1 if error, -2 if out of memory, -3 if block too long. */
+int Extended::format_block( Resizable_buffer & rbuf ) const
+ {
+ const int bufsize = full_size(); // recalculate sizes if needed
+ if( bufsize <= 0 ) return bufsize; // error or no extended data
+ if( !rbuf.resize( bufsize ) ) return -2; // extended block buffer
+ uint8_t * const header = rbuf.u8(); // extended header
+ char * const buf = rbuf() + header_size; // extended records
+ init_tar_header( header );
+ header[typeflag_o] = tf_extended; // fill only required fields
+ print_octal( header + size_o, size_l - 1, edsize_ );
+ print_octal( header + chksum_o, chksum_l - 1, ustar_chksum( header ) );
+
+ if( path_recsize_ && !print_record( buf, path_recsize_, "path", path_ ) )
+ return -1;
+ int pos = path_recsize_;
+ if( linkpath_recsize_ &&
+ !print_record( buf + pos, linkpath_recsize_, "linkpath", linkpath_ ) )
+ return -1;
+ pos += linkpath_recsize_;
+ if( file_size_recsize_ &&
+ !print_record( buf + pos, file_size_recsize_, "size", file_size_ ) )
+ return -1;
+ pos += file_size_recsize_;
+ if( uid_recsize_ && !print_record( buf + pos, uid_recsize_, "uid", uid_ ) )
+ return -1;
+ pos += uid_recsize_;
+ if( gid_recsize_ && !print_record( buf + pos, gid_recsize_, "gid", gid_ ) )
+ return -1;
+ pos += gid_recsize_;
+ if( atime_recsize_ &&
+ !print_record( buf + pos, atime_recsize_, "atime", atime_ ) )
+ return -1;
+ pos += atime_recsize_;
+ if( mtime_recsize_ &&
+ !print_record( buf + pos, mtime_recsize_, "mtime", mtime_ ) )
+ return -1;
+ pos += mtime_recsize_;
+ const unsigned crc_size = Extended::crc_record.size();
+ std::memcpy( buf + pos, Extended::crc_record.c_str(), crc_size );
+ pos += crc_size;
+ if( pos != edsize_ ) return -1;
+ print_hex( buf + edsize_ - 9, 8,
+ crc32c.windowed_crc( (const uint8_t *)buf, edsize_ - 9, edsize_ ) );
+ if( padded_edsize_ > edsize_ ) // set padding to zero
+ std::memset( buf + edsize_, 0, padded_edsize_ - edsize_ );
+ crc_present_ = true;
+ return bufsize;
+ }
+
+
+const char * Extended::full_size_error() const
+ {
+ const char * const eferec_msg = "Error formatting extended records.";
+ switch( full_size_ )
+ {
+ case -1: return eferec_msg;
+ case -2: return mem_msg2;
+ case -3: return longrec_msg;
+ default: internal_error( "invalid call to full_size_error." );
+ return 0; // keep compiler quiet
+ }
+ }
+
+
+bool Extended::parse( const char * const buf, const int edsize,
+ const bool permissive )
+ {
+ reset(); full_size_ = -4; // invalidate cached sizes
+ for( int pos = 0; pos < edsize; ) // parse records
+ {
+ const char * tail;
+ const int rsize =
+ parse_decimal( buf + pos, &tail, edsize - pos, edsize - pos );
+ if( rsize <= 0 || tail[0] != ' ' || buf[pos+rsize-1] != '\n' ) return false;
+ ++tail; // point to keyword
+ // rest = length of (keyword + '=' + value) without the final newline
+ const int rest = ( buf + ( pos + rsize - 1 ) ) - tail;
+ if( rest > 5 && std::memcmp( tail, "path=", 5 ) == 0 )
+ {
+ if( path_.size() && !permissive ) return false;
+ int len = rest - 5;
+ while( len > 1 && tail[5+len-1] == '/' ) --len; // trailing '/'
+ path_.assign( tail + 5, len );
+ // this also truncates path_ at the first embedded null character
+ path_.assign( remove_leading_dotslash( path_.c_str(), &removed_prefix ) );
+ }
+ else if( rest > 9 && std::memcmp( tail, "linkpath=", 9 ) == 0 )
+ {
+ if( linkpath_.size() && !permissive ) return false;
+ int len = rest - 9;
+ while( len > 1 && tail[9+len-1] == '/' ) --len; // trailing '/'
+ linkpath_.assign( tail + 9, len );
+ }
+ else if( rest > 5 && std::memcmp( tail, "size=", 5 ) == 0 )
+ {
+ if( file_size_ != 0 && !permissive ) return false;
+ file_size_ = parse_decimal( tail + 5, &tail, rest - 5, max_file_size );
+ // overflow, parse error, or size fits in ustar header
+ if( file_size_ < 1LL << 33 || tail != buf + ( pos + rsize - 1 ) )
+ return false;
+ }
+ else if( rest > 4 && std::memcmp( tail, "uid=", 4 ) == 0 )
+ {
+ if( uid_ >= 0 && !permissive ) return false;
+ uid_ = parse_decimal( tail + 4, &tail, rest - 4 );
+ // overflow, parse error, or uid fits in ustar header
+ if( uid_ < 1 << 21 || tail != buf + ( pos + rsize - 1 ) ) return false;
+ }
+ else if( rest > 4 && std::memcmp( tail, "gid=", 4 ) == 0 )
+ {
+ if( gid_ >= 0 && !permissive ) return false;
+ gid_ = parse_decimal( tail + 4, &tail, rest - 4 );
+ // overflow, parse error, or gid fits in ustar header
+ if( gid_ < 1 << 21 || tail != buf + ( pos + rsize - 1 ) ) return false;
+ }
+ else if( rest > 6 && std::memcmp( tail, "atime=", 6 ) == 0 )
+ {
+ if( atime_.isvalid() && !permissive ) return false;
+ if( !atime_.parse( tail + 6, &tail, rest - 6 ) || // parse error
+ tail != buf + ( pos + rsize - 1 ) ) return false;
+ }
+ else if( rest > 6 && std::memcmp( tail, "mtime=", 6 ) == 0 )
+ {
+ if( mtime_.isvalid() && !permissive ) return false;
+ if( !mtime_.parse( tail + 6, &tail, rest - 6 ) || // parse error
+ tail != buf + ( pos + rsize - 1 ) ) return false;
+ }
+ else if( rest > 10 && std::memcmp( tail, "GNU.crc32=", 10 ) == 0 )
+ {
+ if( crc_present_ && !permissive ) return false;
+ if( rsize != (int)crc_record.size() ) return false;
+ crc_present_ = true;
+ const uint32_t stored_crc = parse_record_crc( tail + 10 );
+ const uint32_t computed_crc =
+ crc32c.windowed_crc( (const uint8_t *)buf, pos + rsize - 9, edsize );
+ if( stored_crc != computed_crc )
+ {
+ if( verbosity >= 2 )
+ std::fprintf( stderr, "CRC32-C = %08X\n", (unsigned)computed_crc );
+ return false;
+ }
+ }
+ else if( ( rest < 8 || std::memcmp( tail, "comment=", 8 ) != 0 ) &&
+ verbosity >= 1 ) unknown_keyword( tail, rest );
+ pos += rsize;
+ }
+ return true;
+ }
+
+
+/* If not already initialized, copy linkpath, path, file_size, uid, gid,
+ atime, and mtime from ustar header. */
+void Extended::fill_from_ustar( const Tar_header header )
+ {
+ if( linkpath_.empty() ) // copy linkpath from ustar header
+ {
+ int len = 0;
+ while( len < linkname_l && header[linkname_o+len] ) ++len;
+ while( len > 1 && header[linkname_o+len-1] == '/' ) --len; // trailing '/'
+ if( len > 0 )
+ {
+ linkpath_.assign( (const char *)header + linkname_o, len );
+ full_size_ = -4;
+ }
+ }
+
+ if( path_.empty() ) // copy path from ustar header
+ { // the entire path may be in prefix
+ char stored_name[prefix_l+1+name_l+1];
+ int len = 0;
+ while( len < prefix_l && header[prefix_o+len] )
+ { stored_name[len] = header[prefix_o+len]; ++len; }
+ if( len && header[name_o] ) stored_name[len++] = '/';
+ for( int i = 0; i < name_l && header[name_o+i]; ++i )
+ { stored_name[len] = header[name_o+i]; ++len; }
+ while( len > 0 && stored_name[len-1] == '/' ) --len; // trailing '/'
+ stored_name[len] = 0;
+ path( remove_leading_dotslash( stored_name, &removed_prefix ) );
+ }
+
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ if( file_size_ == 0 && // copy file_size from ustar header
+ ( typeflag == tf_regular || typeflag == tf_hiperf ) )
+ file_size( parse_octal( header + size_o, size_l ) );
+ if( uid_ < 0 ) uid_ = parse_octal( header + uid_o, uid_l );
+ if( gid_ < 0 ) gid_ = parse_octal( header + gid_o, gid_l );
+ if( !atime_.isvalid() )
+ atime_.set( parse_octal( header + mtime_o, mtime_l ) ); // 33 bits
+ if( !mtime_.isvalid() )
+ mtime_.set( parse_octal( header + mtime_o, mtime_l ) ); // 33 bits
+ }
+
+
+/* Return file size from record or from ustar header, and reset file_size_.
+ Used for fast parsing of headers in uncompressed archives. */
+long long Extended::get_file_size_and_reset( const Tar_header header )
+ {
+ const long long tmp = file_size_;
+ file_size( 0 ); // reset full_size_
+ const Typeflag typeflag = (Typeflag)header[typeflag_o];
+ if( typeflag != tf_regular && typeflag != tf_hiperf ) return 0;
+ if( tmp > 0 ) return tmp;
+ return parse_octal( header + size_o, size_l );
+ }
diff --git a/lzip_index.cc b/lzip_index.cc
new file mode 100644
index 0000000..bcdc54f
--- /dev/null
+++ b/lzip_index.cc
@@ -0,0 +1,210 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdio>
+#include <unistd.h>
+
+#include "tarlz.h"
+#include "lzip_index.h"
+
+
+int seek_read( const int fd, uint8_t * const buf, const int size,
+ const long long pos )
+ {
+ if( lseek( fd, pos, SEEK_SET ) == pos )
+ return readblock( fd, buf, size );
+ return 0;
+ }
+
+namespace {
+
+const char * bad_version( const unsigned version )
+ {
+ static char buf[80];
+ snprintf( buf, sizeof buf, "Version %u member format not supported.",
+ version );
+ return buf;
+ }
+
+} // end namespace
+
+
+bool Lzip_index::check_header( const Lzip_header & header, const bool first )
+ {
+ if( !header.check_magic() )
+ { error_ = bad_magic_msg; retval_ = 2; if( first ) bad_magic_ = true;
+ return false; }
+ if( !header.check_version() )
+ { error_ = bad_version( header.version() ); retval_ = 2; return false; }
+ if( !isvalid_ds( header.dictionary_size() ) )
+ { error_ = bad_dict_msg; retval_ = 2; return false; }
+ return true;
+ }
+
+void Lzip_index::set_errno_error( const char * const msg )
+ {
+ error_ = msg; error_ += std::strerror( errno );
+ retval_ = 1;
+ }
+
+void Lzip_index::set_num_error( const char * const msg, unsigned long long num )
+ {
+ char buf[80];
+ snprintf( buf, sizeof buf, "%s%llu", msg, num );
+ error_ = buf;
+ retval_ = 2;
+ }
+
+
+bool Lzip_index::read_header( const int fd, Lzip_header & header,
+ const long long pos )
+ {
+ if( seek_read( fd, header.data, header.size, pos ) != header.size )
+ { set_errno_error( "Error reading member header: " ); return false; }
+ return true;
+ }
+
+
+// If successful, push last member and set pos to member header.
+bool Lzip_index::skip_trailing_data( const int fd, unsigned long long & pos )
+ {
+ if( pos < min_member_size ) return false;
+ enum { block_size = 16384,
+ buffer_size = block_size + Lzip_trailer::size - 1 + Lzip_header::size };
+ uint8_t buffer[buffer_size];
+ int bsize = pos % block_size; // total bytes in buffer
+ if( bsize <= buffer_size - block_size ) bsize += block_size;
+ int search_size = bsize; // bytes to search for trailer
+ int rd_size = bsize; // bytes to read from file
+ unsigned long long ipos = pos - rd_size; // aligned to block_size
+
+ while( true )
+ {
+ if( seek_read( fd, buffer, rd_size, ipos ) != rd_size )
+ { set_errno_error( "Error seeking member trailer: " ); return false; }
+ const uint8_t max_msb = ( ipos + search_size ) >> 56;
+ for( int i = search_size; i >= Lzip_trailer::size; --i )
+ if( buffer[i-1] <= max_msb ) // most significant byte of member_size
+ {
+ const Lzip_trailer & trailer =
+ *(const Lzip_trailer *)( buffer + i - trailer.size );
+ const unsigned long long member_size = trailer.member_size();
+ if( member_size == 0 ) // skip trailing zeros
+ { while( i > trailer.size && buffer[i-9] == 0 ) --i; continue; }
+ if( member_size > ipos + i || !trailer.check_consistency() ) continue;
+ Lzip_header header;
+ if( !read_header( fd, header, ipos + i - member_size ) ) return false;
+ if( !header.check() ) continue;
+ const Lzip_header & header2 = *(const Lzip_header *)( buffer + i );
+ const bool full_h2 = bsize - i >= header.size;
+ if( header2.check_prefix( bsize - i ) ) // last member
+ {
+ if( !full_h2 ) error_ = "Last member in input file is truncated.";
+ else if( check_header( header2, false ) )
+ error_ = "Last member in input file is truncated or corrupt.";
+ retval_ = 2; return false;
+ }
+ if( full_h2 && header2.check_corrupt() )
+ { error_ = corrupt_mm_msg; retval_ = 2; return false; }
+ pos = ipos + i - member_size; // good member
+ const unsigned dictionary_size = header.dictionary_size();
+ if( dictionary_size_ < dictionary_size )
+ dictionary_size_ = dictionary_size;
+ member_vector.push_back( Member( 0, trailer.data_size(), pos,
+ member_size, dictionary_size ) );
+ return true;
+ }
+ if( ipos == 0 )
+ { set_num_error( "Bad trailer at pos ", pos - Lzip_trailer::size );
+ return false; }
+ bsize = buffer_size;
+ search_size = bsize - Lzip_header::size;
+ rd_size = block_size;
+ ipos -= rd_size;
+ std::memcpy( buffer + rd_size, buffer, buffer_size - rd_size );
+ }
+ }
+
+
+Lzip_index::Lzip_index( const int infd )
+ : insize( lseek( infd, 0, SEEK_END ) ), retval_( 0 ), dictionary_size_( 0 ),
+ bad_magic_( false )
+ {
+ if( insize < 0 )
+ { set_errno_error( "Input file is not seekable: " ); return; }
+ if( insize < min_member_size )
+ { error_ = "Input file is too short."; retval_ = 2; return; }
+ if( insize > INT64_MAX )
+ { error_ = "Input file is too long (2^63 bytes or more).";
+ retval_ = 2; return; }
+
+ Lzip_header header;
+ if( !read_header( infd, header, 0 ) ||
+ !check_header( header, true ) ) return;
+
+ unsigned long long pos = insize; // always points to a header or to EOF
+ while( pos >= min_member_size )
+ {
+ Lzip_trailer trailer;
+ if( seek_read( infd, trailer.data, trailer.size, pos - trailer.size ) !=
+ trailer.size )
+ { set_errno_error( "Error reading member trailer: " ); break; }
+ const unsigned long long member_size = trailer.member_size();
+ if( member_size > pos || !trailer.check_consistency() ) // bad trailer
+ {
+ if( member_vector.empty() )
+ { if( skip_trailing_data( infd, pos ) ) continue; return; }
+ set_num_error( "Bad trailer at pos ", pos - trailer.size ); break;
+ }
+ if( !read_header( infd, header, pos - member_size ) ) break;
+ if( !header.check() ) // bad header
+ {
+ if( member_vector.empty() )
+ { if( skip_trailing_data( infd, pos ) ) continue; return; }
+ set_num_error( "Bad header at pos ", pos - member_size ); break;
+ }
+ pos -= member_size; // good member
+ const unsigned dictionary_size = header.dictionary_size();
+ if( dictionary_size_ < dictionary_size )
+ dictionary_size_ = dictionary_size;
+ member_vector.push_back( Member( 0, trailer.data_size(), pos,
+ member_size, dictionary_size ) );
+ }
+ if( pos != 0 || member_vector.empty() || retval_ != 0 )
+ {
+ member_vector.clear();
+ if( retval_ == 0 ) { error_ = "Can't create file index."; retval_ = 2; }
+ return;
+ }
+ std::reverse( member_vector.begin(), member_vector.end() );
+ for( unsigned long i = 0; ; ++i )
+ {
+ const long long end = member_vector[i].dblock.end();
+ if( end < 0 || end > INT64_MAX )
+ {
+ member_vector.clear();
+ error_ = "Data in input file is too long (2^63 bytes or more).";
+ retval_ = 2; return;
+ }
+ if( i + 1 >= member_vector.size() ) break;
+ member_vector[i+1].dblock.pos( end );
+ }
+ }
diff --git a/lzip_index.h b/lzip_index.h
new file mode 100644
index 0000000..822f537
--- /dev/null
+++ b/lzip_index.h
@@ -0,0 +1,93 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef INT64_MAX
+#define INT64_MAX 0x7FFFFFFFFFFFFFFFLL
+#endif
+
+
+class Block
+ {
+ long long pos_, size_; // pos >= 0, size >= 0, pos + size <= INT64_MAX
+
+public:
+ Block( const long long p, const long long s ) : pos_( p ), size_( s ) {}
+
+ long long pos() const { return pos_; }
+ long long size() const { return size_; }
+ long long end() const { return pos_ + size_; }
+
+ void pos( const long long p ) { pos_ = p; }
+ void size( const long long s ) { size_ = s; }
+ };
+
+
+class Lzip_index
+ {
+ struct Member
+ {
+ Block dblock, mblock; // data block, member block
+ unsigned dictionary_size;
+
+ Member( const long long dpos, const long long dsize,
+ const long long mpos, const long long msize,
+ const unsigned dict_size )
+ : dblock( dpos, dsize ), mblock( mpos, msize ),
+ dictionary_size( dict_size ) {}
+ };
+
+ std::vector< Member > member_vector;
+ std::string error_;
+ const long long insize;
+ int retval_;
+ unsigned dictionary_size_; // largest dictionary size in the file
+ bool bad_magic_; // bad magic in first header
+
+ bool check_header( const Lzip_header & header, const bool first );
+ void set_errno_error( const char * const msg );
+ void set_num_error( const char * const msg, unsigned long long num );
+ bool read_header( const int fd, Lzip_header & header, const long long pos );
+ bool skip_trailing_data( const int fd, unsigned long long & pos );
+
+public:
+ Lzip_index( const int infd );
+
+ long members() const { return member_vector.size(); }
+ const std::string & error() const { return error_; }
+ int retval() const { return retval_; }
+ unsigned dictionary_size() const { return dictionary_size_; }
+ bool bad_magic() const { return bad_magic_; }
+
+ long long udata_size() const
+ { if( member_vector.empty() ) return 0;
+ return member_vector.back().dblock.end(); }
+
+ long long cdata_size() const
+ { if( member_vector.empty() ) return 0;
+ return member_vector.back().mblock.end(); }
+
+ // total size including trailing data (if any)
+ long long file_size() const
+ { if( insize >= 0 ) return insize; else return 0; }
+
+ const Block & dblock( const long i ) const
+ { return member_vector[i].dblock; }
+ const Block & mblock( const long i ) const
+ { return member_vector[i].mblock; }
+ unsigned dictionary_size( const long i ) const
+ { return member_vector[i].dictionary_size; }
+ };
diff --git a/main.cc b/main.cc
new file mode 100644
index 0000000..db37f76
--- /dev/null
+++ b/main.cc
@@ -0,0 +1,720 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+/*
+ Exit status: 0 for a normal exit, 1 for environmental problems
+ (file not found, files differ, invalid command-line options, I/O errors,
+ etc), 2 to indicate a corrupt or invalid input file, 3 for an internal
+ consistency error (e.g., bug) which caused tarlz to panic.
+*/
+
+#define _FILE_OFFSET_BITS 64
+
+#include <cctype>
+#include <cerrno>
+#include <cstdarg>
+#include <cstdio>
+#include <ctime>
+#include <fcntl.h>
+#include <pthread.h> // for pthread_t
+#include <stdint.h> // for lzlib.h
+#include <unistd.h>
+#include <sys/stat.h>
+#include <grp.h>
+#include <pwd.h>
+#include <lzlib.h>
+#if defined __OS2__
+#include <io.h>
+#endif
+
+#include "tarlz.h"
+#include "arg_parser.h"
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+#if CHAR_BIT != 8
+#error "Environments where CHAR_BIT != 8 are not supported."
+#endif
+
+int verbosity = 0;
+const char * const program_name = "tarlz";
+
+namespace {
+
+const char * const program_year = "2024";
+const char * invocation_name = program_name; // default value
+
+
+void show_help( const long num_online )
+ {
+ std::printf( "Tarlz is a massively parallel (multi-threaded) combined implementation of\n"
+ "the tar archiver and the lzip compressor. Tarlz uses the compression library\n"
+ "lzlib.\n"
+ "\nTarlz creates tar archives using a simplified and safer variant of the POSIX\n"
+ "pax format compressed in lzip format, keeping the alignment between tar\n"
+ "members and lzip members. The resulting multimember tar.lz archive is\n"
+ "backward compatible with standard tar tools like GNU tar, which treat it\n"
+ "like any other tar.lz archive. Tarlz can append files to the end of such\n"
+ "compressed archives.\n"
+ "\nKeeping the alignment between tar members and lzip members has two\n"
+ "advantages. It adds an indexed lzip layer on top of the tar archive, making\n"
+ "it possible to decode the archive safely in parallel. It also minimizes the\n"
+ "amount of data lost in case of corruption.\n"
+ "\nThe tarlz file format is a safe POSIX-style backup format. In case of\n"
+ "corruption, tarlz can extract all the undamaged members from the tar.lz\n"
+ "archive, skipping over the damaged members, just like the standard\n"
+ "(uncompressed) tar. Moreover, the option '--keep-damaged' can be used to\n"
+ "recover as much data as possible from each damaged member, and lziprecover\n"
+ "can be used to recover some of the damaged members.\n"
+ "\nUsage: %s operation [options] [files]\n", invocation_name );
+ std::printf( "\nOperations:\n"
+ " --help display this help and exit\n"
+ " -V, --version output version information and exit\n"
+ " -A, --concatenate append archives to the end of an archive\n"
+ " -c, --create create a new archive\n"
+ " -d, --diff find differences between archive and file system\n"
+ " --delete delete files/directories from an archive\n"
+ " -r, --append append files to the end of an archive\n"
+ " -t, --list list the contents of an archive\n"
+ " -x, --extract extract files/directories from an archive\n"
+ " -z, --compress compress existing POSIX tar archives\n"
+ " --check-lib check version of lzlib and exit\n"
+ "\nOptions:\n"
+ " -B, --data-size=<bytes> set target size of input data blocks [2x8=16 MiB]\n"
+ " -C, --directory=<dir> change to directory <dir>\n"
+ " -f, --file=<archive> use archive file <archive>\n"
+ " -h, --dereference follow symlinks; archive the files they point to\n"
+ " -n, --threads=<n> set number of (de)compression threads [%ld]\n"
+ " -o, --output=<file> compress to <file> ('-' for stdout)\n"
+ " -p, --preserve-permissions don't subtract the umask on extraction\n"
+ " -q, --quiet suppress all messages\n"
+ " -v, --verbose verbosely list files processed\n"
+ " -0 .. -9 set compression level [default 6]\n"
+ " --uncompressed don't compress the archive created\n"
+ " --asolid create solidly compressed appendable archive\n"
+ " --bsolid create per block compressed archive (default)\n"
+ " --dsolid create per directory compressed archive\n"
+ " --no-solid create per file compressed archive\n"
+ " --solid create solidly compressed archive\n"
+ " --anonymous equivalent to '--owner=root --group=root'\n"
+ " --owner=<owner> use <owner> name/ID for files added to archive\n"
+ " --group=<group> use <group> name/ID for files added to archive\n"
+ " --exclude=<pattern> exclude files matching a shell pattern\n"
+ " --ignore-ids ignore differences in owner and group IDs\n"
+ " --ignore-metadata compare only file size and file content\n"
+ " --ignore-overflow ignore mtime overflow differences on 32-bit\n"
+ " --keep-damaged don't delete partially extracted files\n"
+ " --missing-crc exit with error status if missing extended CRC\n"
+ " --mtime=<date> use <date> as mtime for files added to archive\n"
+ " --out-slots=<n> number of 1 MiB output packets buffered [64]\n"
+ " --warn-newer warn if any file is newer than the archive\n"
+/* " --permissive allow repeated extended headers and records\n"*/,
+ num_online );
+ if( verbosity >= 1 )
+ {
+ std::printf( " --debug=<level> (0-1) print debug statistics to stderr\n" );
+ }
+ std::printf( "\nIf no archive is specified, tarlz tries to read it from standard input or\n"
+ "write it to standard output.\n"
+ "\nExit status: 0 for a normal exit, 1 for environmental problems\n"
+ "(file not found, files differ, invalid command-line options, I/O errors,\n"
+ "etc), 2 to indicate a corrupt or invalid input file, 3 for an internal\n"
+ "consistency error (e.g., bug) which caused tarlz to panic.\n"
+ "\nReport bugs to lzip-bug@nongnu.org\n"
+ "Tarlz home page: http://www.nongnu.org/lzip/tarlz.html\n" );
+ }
+
+
+void show_version()
+ {
+ std::printf( "%s %s\n", program_name, PROGVERSION );
+ std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
+ std::printf( "Using lzlib %s\n", LZ_version() );
+ std::printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n"
+ "This is free software: you are free to change and redistribute it.\n"
+ "There is NO WARRANTY, to the extent permitted by law.\n" );
+ }
+
+
+int check_lzlib_ver() // <major>.<minor> or <major>.<minor>[a-z.-]*
+ {
+#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012
+ const unsigned char * p = (unsigned char *)LZ_version_string;
+ unsigned major = 0, minor = 0;
+ while( major < 100000 && isdigit( *p ) )
+ { major *= 10; major += *p - '0'; ++p; }
+ if( *p == '.' ) ++p;
+ else
+out: { show_error( "Invalid LZ_version_string in lzlib.h" ); return 2; }
+ while( minor < 100 && isdigit( *p ) )
+ { minor *= 10; minor += *p - '0'; ++p; }
+ if( *p && *p != '-' && *p != '.' && !std::islower( *p ) ) goto out;
+ const unsigned version = major * 1000 + minor;
+ if( LZ_API_VERSION != version )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Version mismatch in lzlib.h: "
+ "LZ_API_VERSION = %u, should be %u.\n",
+ program_name, LZ_API_VERSION, version );
+ return 2;
+ }
+#endif
+ return 0;
+ }
+
+
+int check_lib()
+ {
+ int retval = check_lzlib_ver();
+ if( std::strcmp( LZ_version_string, LZ_version() ) != 0 )
+ { set_retval( retval, 1 );
+ if( verbosity >= 0 )
+ std::printf( "warning: LZ_version_string != LZ_version() (%s vs %s)\n",
+ LZ_version_string, LZ_version() ); }
+#if defined LZ_API_VERSION && LZ_API_VERSION >= 1012
+ if( LZ_API_VERSION != LZ_api_version() )
+ { set_retval( retval, 1 );
+ if( verbosity >= 0 )
+ std::printf( "warning: LZ_API_VERSION != LZ_api_version() (%u vs %u)\n",
+ LZ_API_VERSION, LZ_api_version() ); }
+#endif
+ if( verbosity >= 1 )
+ {
+ std::printf( "Using lzlib %s\n", LZ_version() );
+#if !defined LZ_API_VERSION
+ std::fputs( "LZ_API_VERSION is not defined.\n", stdout );
+#elif LZ_API_VERSION >= 1012
+ std::printf( "Using LZ_API_VERSION = %u\n", LZ_api_version() );
+#else
+ std::printf( "Compiled with LZ_API_VERSION = %u. "
+ "Using an unknown LZ_API_VERSION\n", LZ_API_VERSION );
+#endif
+ }
+ return retval;
+ }
+
+
+// separate numbers of 5 or more digits in groups of 3 digits using '_'
+const char * format_num3( long long num )
+ {
+ enum { buffers = 8, bufsize = 4 * sizeof num, n = 10 };
+ const char * const si_prefix = "kMGTPEZYRQ";
+ const char * const binary_prefix = "KMGTPEZYRQ";
+ static char buffer[buffers][bufsize]; // circle of static buffers for printf
+ static int current = 0;
+
+ char * const buf = buffer[current++]; current %= buffers;
+ char * p = buf + bufsize - 1; // fill the buffer backwards
+ *p = 0; // terminator
+ const bool negative = num < 0;
+ if( num > 1024 || num < -1024 )
+ {
+ char prefix = 0; // try binary first, then si
+ for( int i = 0; i < n && num != 0 && num % 1024 == 0; ++i )
+ { num /= 1024; prefix = binary_prefix[i]; }
+ if( prefix ) *(--p) = 'i';
+ else
+ for( int i = 0; i < n && num != 0 && num % 1000 == 0; ++i )
+ { num /= 1000; prefix = si_prefix[i]; }
+ if( prefix ) *(--p) = prefix;
+ }
+ const bool split = num >= 10000 || num <= -10000;
+
+ for( int i = 0; ; )
+ {
+ const long long onum = num; num /= 10;
+ *(--p) = llabs( onum - ( 10 * num ) ) + '0'; if( num == 0 ) break;
+ if( split && ++i >= 3 ) { i = 0; *(--p) = '_'; }
+ }
+ if( negative ) *(--p) = '-';
+ return p;
+ }
+
+
+void show_option_error( const char * const arg, const char * const msg,
+ const char * const option_name )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: '%s': %s option '%s'.\n",
+ program_name, arg, msg, option_name );
+ }
+
+
+// Recognized formats: <num>k, <num>Ki, <num>[MGTPEZYRQ][i]
+long long getnum( const char * const arg, const char * const option_name,
+ const long long llimit = LLONG_MIN,
+ const long long ulimit = LLONG_MAX )
+ {
+ char * tail;
+ errno = 0;
+ long long result = strtoll( arg, &tail, 0 );
+ if( tail == arg )
+ { show_option_error( arg, "Bad or missing numerical argument in",
+ option_name ); std::exit( 1 ); }
+
+ if( !errno && tail[0] )
+ {
+ const int factor = ( tail[1] == 'i' ) ? 1024 : 1000;
+ int exponent = 0; // 0 = bad multiplier
+ switch( tail[0] )
+ {
+ case 'Q': exponent = 10; break;
+ case 'R': exponent = 9; break;
+ case 'Y': exponent = 8; break;
+ case 'Z': exponent = 7; break;
+ case 'E': exponent = 6; break;
+ case 'P': exponent = 5; break;
+ case 'T': exponent = 4; break;
+ case 'G': exponent = 3; break;
+ case 'M': exponent = 2; break;
+ case 'K': if( factor == 1024 ) exponent = 1; break;
+ case 'k': if( factor == 1000 ) exponent = 1; break;
+ }
+ if( exponent <= 0 )
+ { show_option_error( arg, "Bad multiplier in numerical argument of",
+ option_name ); std::exit( 1 ); }
+ for( int i = 0; i < exponent; ++i )
+ {
+ if( ( result >= 0 && LLONG_MAX / factor >= result ) ||
+ ( result < 0 && LLONG_MIN / factor <= result ) ) result *= factor;
+ else { errno = ERANGE; break; }
+ }
+ }
+ if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
+ if( errno )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: '%s': Value out of limits [%s,%s] in "
+ "option '%s'.\n", program_name, arg, format_num3( llimit ),
+ format_num3( ulimit ), option_name );
+ std::exit( 1 );
+ }
+ return result;
+ }
+
+
+void set_archive_name( std::string & archive_name, const std::string & new_name )
+ {
+ static bool first_call = true;
+
+ if( first_call ) { if( new_name != "-" ) archive_name = new_name;
+ first_call = false; return; }
+ show_error( "Only one archive can be specified.", 0, true );
+ std::exit( 1 );
+ }
+
+
+void set_mode( Program_mode & program_mode, const Program_mode new_mode )
+ {
+ if( program_mode != m_none && program_mode != new_mode )
+ {
+ show_error( "Only one operation can be specified.", 0, true );
+ std::exit( 1 );
+ }
+ program_mode = new_mode;
+ }
+
+
+// parse time as 'long long' even if time_t is 32-bit
+long long parse_mtime( const char * arg, const char * const pn )
+ {
+ if( *arg == '@' ) return getnum( arg + 1, pn ); // seconds since the epoch
+ else if( *arg == '.' || *arg == '/' )
+ {
+ struct stat st;
+ if( stat( arg, &st ) == 0 ) return st.st_mtime;
+ show_file_error( arg, "Can't stat mtime reference file", errno );
+ std::exit( 1 );
+ }
+ else // format '[-]YYYY-MM-DD[[[<separator>HH]:MM]:SS]'
+ {
+ long long y; // long long because 2147483648-01-01 overflows int
+ unsigned mo, d, h, m, s;
+ char sep;
+ const int n = std::sscanf( arg, "%lld-%u-%u%c%u:%u:%u",
+ &y, &mo, &d, &sep, &h, &m, &s );
+ if( n >= 3 && n <= 7 && n != 4 && ( n == 3 || sep == ' ' || sep == 'T' ) )
+ {
+ if( y >= INT_MIN + 1900 && y <= INT_MAX && mo >= 1 && mo <= 12 )
+ {
+ struct tm t;
+ t.tm_year = y - 1900; t.tm_mon = mo - 1; t.tm_mday = d;
+ t.tm_hour = ( n >= 5 ) ? h : 0; t.tm_min = ( n >= 6 ) ? m : 0;
+ t.tm_sec = ( n >= 7 ) ? s : 0; t.tm_isdst = -1;
+ errno = 0;
+ const long long mtime = std::mktime( &t );
+ if( mtime != -1 || errno == 0 ) return mtime; // valid datetime
+ }
+ show_option_error( arg, "Date out of limits in", pn ); std::exit( 1 );
+ }
+ }
+ show_option_error( arg, "Unknown date format in", pn ); std::exit( 1 );
+ }
+
+
+long long parse_owner( const char * const arg, const char * const pn )
+ {
+ const struct passwd * const pw = getpwnam( arg );
+ if( pw ) return pw->pw_uid;
+ if( std::isdigit( (unsigned char)arg[0] ) )
+ return getnum( arg, pn, 0, LLONG_MAX );
+ if( std::strcmp( arg, "root" ) == 0 ) return 0;
+ show_option_error( arg, "Invalid owner in", pn ); std::exit( 1 );
+ }
+
+long long parse_group( const char * const arg, const char * const pn )
+ {
+ const struct group * const gr = getgrnam( arg );
+ if( gr ) return gr->gr_gid;
+ if( std::isdigit( (unsigned char)arg[0] ) )
+ return getnum( arg, pn, 0, LLONG_MAX );
+ if( std::strcmp( arg, "root" ) == 0 ) return 0;
+ show_option_error( arg, "Invalid group in", pn ); std::exit( 1 );
+ }
+
+} // end namespace
+
+
+int hstat( const char * const filename, struct stat * const st,
+ const bool dereference )
+ { return dereference ? stat( filename, st ) : lstat( filename, st ); }
+
+
+bool nonempty_arg( const Arg_parser & parser, const int i )
+ { return ( parser.code( i ) == 0 && !parser.argument( i ).empty() ); }
+
+
+int open_instream( const std::string & name )
+ {
+ const int infd = open( name.c_str(), O_RDONLY | O_BINARY );
+ if( infd < 0 )
+ { show_file_error( name.c_str(), "Can't open for reading", errno );
+ return -1; }
+ struct stat st; // infd must not be a directory
+ if( fstat( infd, &st ) == 0 && S_ISDIR( st.st_mode ) )
+ { show_file_error( name.c_str(), "Can't read. Is a directory." );
+ close( infd ); return -1; }
+ return infd;
+ }
+
+
+int open_outstream( const std::string & name, const bool create,
+ Resizable_buffer * const rbufp, const bool force )
+ {
+ const int cflags = O_CREAT | O_WRONLY | ( force ? O_TRUNC : O_EXCL );
+ const int flags = ( create ? cflags : O_RDWR ) | O_BINARY;
+ const mode_t outfd_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
+
+ const int outfd = open( name.c_str(), flags, outfd_mode );
+ if( outfd < 0 )
+ {
+ const char * msg = !create ? "Error opening file" :
+ ( ( errno == EEXIST ) ? "Skipping file" : "Can't create file" );
+ if( !rbufp ) show_file_error( name.c_str(), msg, errno );
+ else format_file_error( *rbufp, name.c_str(), msg, errno );
+ }
+ return outfd;
+ }
+
+
+void show_error( const char * const msg, const int errcode, const bool help )
+ {
+ if( verbosity < 0 ) return;
+ if( msg && msg[0] )
+ std::fprintf( stderr, "%s: %s%s%s\n", program_name, msg,
+ ( errcode > 0 ) ? ": " : "",
+ ( errcode > 0 ) ? std::strerror( errcode ) : "" );
+ if( help )
+ std::fprintf( stderr, "Try '%s --help' for more information.\n",
+ invocation_name );
+ }
+
+
+bool format_error( Resizable_buffer & rbuf, const int errcode,
+ const char * const format, ... )
+ {
+ if( verbosity < 0 ) { rbuf.resize( 1 ); rbuf()[0] = 0; return false; }
+ va_list args;
+ for( int i = 0; i < 2; ++i ) // resize rbuf if not large enough
+ {
+ int len = snprintf( rbuf(), rbuf.size(), "%s: ", program_name );
+ if( len >= (int)rbuf.size() && !rbuf.resize( len + 1 ) ) break;
+ va_start( args, format );
+ len += vsnprintf( rbuf() + len, rbuf.size() - len, format, args );
+ va_end( args );
+ if( len >= (int)rbuf.size() && !rbuf.resize( len + 1 ) ) break;
+ if( errcode <= 0 ) rbuf()[len++] = '\n';
+ else len += snprintf( rbuf() + len, rbuf.size() - len, ": %s\n",
+ std::strerror( errcode ) );
+ if( len < (int)rbuf.size() || !rbuf.resize( len + 1 ) ) break;
+ }
+ return true;
+ }
+
+
+void print_error( const int errcode, const char * const format, ... )
+ {
+ if( verbosity < 0 ) return;
+ va_list args;
+ std::fprintf( stderr, "%s: ", program_name );
+ va_start( args, format );
+ std::vfprintf( stderr, format, args );
+ va_end( args );
+ if( errcode <= 0 ) std::fputc( '\n', stderr );
+ else std::fprintf( stderr, ": %s\n", std::strerror( errcode ) );
+ }
+
+
+void format_file_error( std::string & estr, const char * const filename,
+ const char * const msg, const int errcode )
+ {
+ if( verbosity < 0 ) return;
+ estr += program_name; estr += ": "; estr += filename; estr += ": ";
+ estr += msg;
+ if( errcode > 0 ) { estr += ": "; estr += std::strerror( errcode ); }
+ estr += '\n';
+ }
+
+bool format_file_error( Resizable_buffer & rbuf, const char * const filename,
+ const char * const msg, const int errcode )
+ {
+ if( verbosity < 0 ) { rbuf.resize( 1 ); rbuf()[0] = 0; return false; }
+ for( int i = 0; i < 2; ++i ) // resize rbuf if not large enough
+ {
+ const int len = snprintf( rbuf(), rbuf.size(), "%s: %s: %s%s%s\n",
+ program_name, filename, msg, ( errcode > 0 ) ? ": " : "",
+ ( errcode > 0 ) ? std::strerror( errcode ) : "" );
+ if( len < (int)rbuf.size() || !rbuf.resize( len + 1 ) ) break;
+ }
+ return true;
+ }
+
+void show_file_error( const char * const filename, const char * const msg,
+ const int errcode )
+ {
+ if( verbosity >= 0 && msg && msg[0] )
+ std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg,
+ ( errcode > 0 ) ? ": " : "",
+ ( errcode > 0 ) ? std::strerror( errcode ) : "" );
+ }
+
+
+void internal_error( const char * const msg )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg );
+ std::exit( 3 );
+ }
+
+
+int main( const int argc, const char * const argv[] )
+ {
+ if( argc > 0 ) invocation_name = argv[0];
+
+ enum { opt_ano = 256, opt_aso, opt_bso, opt_chk, opt_crc, opt_dbg, opt_del,
+ opt_dso, opt_exc, opt_grp, opt_hlp, opt_iid, opt_imd, opt_kd, opt_mti,
+ opt_nso, opt_ofl, opt_out, opt_own, opt_per, opt_sol, opt_un, opt_wn };
+ const Arg_parser::Option options[] =
+ {
+ { '0', 0, Arg_parser::no },
+ { '1', 0, Arg_parser::no },
+ { '2', 0, Arg_parser::no },
+ { '3', 0, Arg_parser::no },
+ { '4', 0, Arg_parser::no },
+ { '5', 0, Arg_parser::no },
+ { '6', 0, Arg_parser::no },
+ { '7', 0, Arg_parser::no },
+ { '8', 0, Arg_parser::no },
+ { '9', 0, Arg_parser::no },
+ { 'A', "concatenate", Arg_parser::no },
+ { 'B', "data-size", Arg_parser::yes },
+ { 'c', "create", Arg_parser::no },
+ { 'C', "directory", Arg_parser::yes },
+ { 'd', "diff", Arg_parser::no },
+ { 'f', "file", Arg_parser::yes },
+ { 'h', "dereference", Arg_parser::no },
+ { 'H', "format", Arg_parser::yes },
+ { 'n', "threads", Arg_parser::yes },
+ { 'o', "output", Arg_parser::yes },
+ { 'p', "preserve-permissions", Arg_parser::no },
+ { 'q', "quiet", Arg_parser::no },
+ { 'r', "append", Arg_parser::no },
+ { 't', "list", Arg_parser::no },
+ { 'v', "verbose", Arg_parser::no },
+ { 'V', "version", Arg_parser::no },
+ { 'x', "extract", Arg_parser::no },
+ { 'z', "compress", Arg_parser::no },
+ { opt_ano, "anonymous", Arg_parser::no },
+ { opt_aso, "asolid", Arg_parser::no },
+ { opt_bso, "bsolid", Arg_parser::no },
+ { opt_chk, "check-lib", Arg_parser::no },
+ { opt_dbg, "debug", Arg_parser::yes },
+ { opt_del, "delete", Arg_parser::no },
+ { opt_dso, "dsolid", Arg_parser::no },
+ { opt_exc, "exclude", Arg_parser::yes },
+ { opt_grp, "group", Arg_parser::yes },
+ { opt_hlp, "help", Arg_parser::no },
+ { opt_iid, "ignore-ids", Arg_parser::no },
+ { opt_imd, "ignore-metadata", Arg_parser::no },
+ { opt_kd, "keep-damaged", Arg_parser::no },
+ { opt_crc, "missing-crc", Arg_parser::no },
+ { opt_mti, "mtime", Arg_parser::yes },
+ { opt_nso, "no-solid", Arg_parser::no },
+ { opt_ofl, "ignore-overflow", Arg_parser::no },
+ { opt_out, "out-slots", Arg_parser::yes },
+ { opt_own, "owner", Arg_parser::yes },
+ { opt_per, "permissive", Arg_parser::no },
+ { opt_sol, "solid", Arg_parser::no },
+ { opt_un, "uncompressed", Arg_parser::no },
+ { opt_wn, "warn-newer", Arg_parser::no },
+ { 0, 0, Arg_parser::no } };
+
+ const Arg_parser parser( argc, argv, options, true ); // in_order
+ if( parser.error().size() ) // bad option
+ { show_error( parser.error().c_str(), 0, true ); return 1; }
+ Cl_options cl_opts( parser );
+
+ const long num_online = std::max( 1L, sysconf( _SC_NPROCESSORS_ONLN ) );
+ long max_workers = sysconf( _SC_THREAD_THREADS_MAX );
+ if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) )
+ max_workers = INT_MAX / sizeof (pthread_t);
+
+ const char * f_pn = 0;
+ const char * o_pn = 0;
+ const char * z_pn = 0;
+ for( int argind = 0; argind < parser.arguments(); ++argind )
+ {
+ const int code = parser.code( argind );
+ if( !code ) // skip non-options
+ {
+ if( parser.argument( argind ).empty() )
+ { show_error( "Empty non-option argument." ); return 1; }
+ if( parser.argument( argind ) != "-" ) cl_opts.filenames_given = true;
+ ++cl_opts.num_files; continue;
+ }
+ const char * const pn = parser.parsed_name( argind ).c_str();
+ const std::string & sarg = parser.argument( argind );
+ const char * const arg = sarg.c_str();
+ switch( code )
+ {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ cl_opts.set_level( code - '0' ); break;
+ case 'A': set_mode( cl_opts.program_mode, m_concatenate ); break;
+ case 'B': cl_opts.data_size =
+ getnum( arg, pn, min_data_size, max_data_size ); break;
+ case 'c': set_mode( cl_opts.program_mode, m_create ); break;
+ case 'C': break; // skip chdir
+ case 'd': set_mode( cl_opts.program_mode, m_diff ); break;
+ case 'f': set_archive_name( cl_opts.archive_name, sarg ); f_pn = pn; break;
+ case 'h': cl_opts.dereference = true; break;
+ case 'H': break; // ignore format
+ case 'n': cl_opts.num_workers = getnum( arg, pn, 0, max_workers ); break;
+ case 'o': cl_opts.output_filename = sarg; o_pn = pn; break;
+ case 'p': cl_opts.preserve_permissions = true; break;
+ case 'q': verbosity = -1; break;
+ case 'r': set_mode( cl_opts.program_mode, m_append ); break;
+ case 't': set_mode( cl_opts.program_mode, m_list ); break;
+ case 'v': if( verbosity < 4 ) ++verbosity; break;
+ case 'V': show_version(); return 0;
+ case 'x': set_mode( cl_opts.program_mode, m_extract ); break;
+ case 'z': set_mode( cl_opts.program_mode, m_compress ); z_pn = pn; break;
+ case opt_ano: cl_opts.uid = parse_owner( "root", pn );
+ cl_opts.gid = parse_group( "root", pn ); break;
+ case opt_aso: cl_opts.solidity = asolid; break;
+ case opt_bso: cl_opts.solidity = bsolid; break;
+ case opt_crc: cl_opts.missing_crc = true; break;
+ case opt_chk: return check_lib();
+ case opt_dbg: cl_opts.debug_level = getnum( arg, pn, 0, 3 ); break;
+ case opt_del: set_mode( cl_opts.program_mode, m_delete ); break;
+ case opt_dso: cl_opts.solidity = dsolid; break;
+ case opt_exc: Exclude::add_pattern( sarg ); break;
+ case opt_grp: cl_opts.gid = parse_group( arg, pn ); break;
+ case opt_hlp: show_help( num_online ); return 0;
+ case opt_iid: cl_opts.ignore_ids = true; break;
+ case opt_imd: cl_opts.ignore_metadata = true; break;
+ case opt_kd: cl_opts.keep_damaged = true; break;
+ case opt_mti: cl_opts.mtime = parse_mtime( arg, pn );
+ cl_opts.mtime_set = true; break;
+ case opt_nso: cl_opts.solidity = no_solid; break;
+ case opt_ofl: cl_opts.ignore_overflow = true; break;
+ case opt_out: cl_opts.out_slots = getnum( arg, pn, 1, 1024 ); break;
+ case opt_own: cl_opts.uid = parse_owner( arg, pn ); break;
+ case opt_per: cl_opts.permissive = true; break;
+ case opt_sol: cl_opts.solidity = solid; break;
+ case opt_un: cl_opts.set_level( -1 ); break;
+ case opt_wn: cl_opts.warn_newer = true; break;
+ default: internal_error( "uncaught option." );
+ }
+ } // end process options
+
+ if( cl_opts.program_mode != m_compress && o_pn )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Option '%s' can only be used with "
+ "'-z, --compress'.\n", program_name, o_pn );
+ return 1;
+ }
+ if( cl_opts.program_mode == m_compress && f_pn )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Option '%s' can't be used with '%s'.\n",
+ program_name, f_pn, z_pn );
+ return 1;
+ }
+ if( cl_opts.program_mode == m_compress && cl_opts.uncompressed() )
+ {
+ if( verbosity >= 0 )
+ std::fprintf( stderr, "%s: Option '--uncompressed' can't be used with '%s'.\n",
+ program_name, z_pn );
+ return 1;
+ }
+
+#if !defined LZ_API_VERSION || LZ_API_VERSION < 1012 // compile-time test
+#error "lzlib 1.12 or newer needed."
+#endif
+ if( LZ_api_version() < 1012 ) // runtime test
+ { show_error( "Wrong library version. At least lzlib 1.12 is required." );
+ return 1; }
+
+#if defined __OS2__
+ setmode( STDIN_FILENO, O_BINARY );
+ setmode( STDOUT_FILENO, O_BINARY );
+#endif
+
+ if( cl_opts.data_size <= 0 && !cl_opts.uncompressed() )
+ {
+ if( cl_opts.level == 0 ) cl_opts.data_size = 1 << 20;
+ else cl_opts.data_size = 2 * option_mapping[cl_opts.level].dictionary_size;
+ }
+ if( cl_opts.num_workers < 0 ) // 0 disables multi-threading
+ cl_opts.num_workers = std::min( num_online, max_workers );
+
+ switch( cl_opts.program_mode )
+ {
+ case m_none: show_error( "Missing operation.", 0, true ); return 1;
+ case m_append:
+ case m_create: return encode( cl_opts );
+ case m_compress: return compress( cl_opts );
+ case m_concatenate: return concatenate( cl_opts );
+ case m_delete: tzset(); return delete_members( cl_opts );
+ case m_diff:
+ case m_extract:
+ case m_list: tzset(); return decode( cl_opts );
+ }
+ }
diff --git a/tarlz.h b/tarlz.h
new file mode 100644
index 0000000..16ae6e0
--- /dev/null
+++ b/tarlz.h
@@ -0,0 +1,608 @@
+/* Tarlz - Archiver with multimember lzip compression
+ Copyright (C) 2013-2024 Antonio Diaz Diaz.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <climits>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <stdint.h>
+
+#define max_file_size ( LLONG_MAX - header_size )
+enum { header_size = 512,
+ max_edata_size = ( INT_MAX / header_size - 2 ) * header_size };
+typedef uint8_t Tar_header[header_size];
+
+enum Offsets {
+ name_o = 0, mode_o = 100, uid_o = 108, gid_o = 116, size_o = 124,
+ mtime_o = 136, chksum_o = 148, typeflag_o = 156, linkname_o = 157,
+ magic_o = 257, version_o = 263, uname_o = 265, gname_o = 297,
+ devmajor_o = 329, devminor_o = 337, prefix_o = 345 };
+
+enum Lengths {
+ name_l = 100, mode_l = 8, uid_l = 8, gid_l = 8, size_l = 12,
+ mtime_l = 12, chksum_l = 8, typeflag_l = 1, linkname_l = 100,
+ magic_l = 6, version_l = 2, uname_l = 32, gname_l = 32,
+ devmajor_l = 8, devminor_l = 8, prefix_l = 155 };
+
+enum Typeflag {
+ tf_regular = '0', tf_link = '1', tf_symlink = '2', tf_chardev = '3',
+ tf_blockdev = '4', tf_directory = '5', tf_fifo = '6', tf_hiperf = '7',
+ tf_global = 'g', tf_extended = 'x' };
+
+const uint8_t ustar_magic[magic_l] =
+ { 0x75, 0x73, 0x74, 0x61, 0x72, 0 }; // "ustar\0"
+
+inline bool check_ustar_magic( const Tar_header header )
+ { return std::memcmp( header + magic_o, ustar_magic, magic_l ) == 0; }
+
+inline void init_tar_header( Tar_header header ) // set magic and version
+ {
+ std::memset( header, 0, header_size );
+ std::memcpy( header + magic_o, ustar_magic, magic_l - 1 );
+ header[version_o] = header[version_o+1] = '0';
+ }
+
+inline void print_octal( uint8_t * const buf, int size, unsigned long long num )
+ { while( --size >= 0 ) { buf[size] = num % 8 + '0'; num /= 8; } }
+
+
+// Round "size" to the next multiple of header size (512).
+//
+inline unsigned long long round_up( const unsigned long long size )
+ {
+ const int rem = size % header_size;
+ const int padding = rem ? header_size - rem : 0;
+ return size + padding;
+ }
+
+
+inline int decimal_digits( unsigned long long value )
+ {
+ int digits = 1;
+ while( value >= 10 ) { value /= 10; ++digits; }
+ return digits;
+ }
+
+
+inline bool dotdot_at_i( const char * const filename, const int i )
+ {
+ return ( filename[i] == '.' && filename[i+1] == '.' &&
+ ( i == 0 || filename[i-1] == '/' ) &&
+ ( filename[i+2] == 0 || filename[i+2] == '/' ) );
+ }
+
+
+inline bool contains_dotdot( const char * const filename )
+ {
+ for( int i = 0; filename[i]; ++i )
+ if( dotdot_at_i( filename, i ) ) return true;
+ return false;
+ }
+
+
+class Resizable_buffer
+ {
+ char * p;
+ unsigned long size_; // size_ < LONG_MAX
+
+public:
+ // must be >= 87 for format_member_name
+ enum { default_initial_size = 2 * header_size };
+
+ explicit Resizable_buffer( const unsigned long initial_size =
+ default_initial_size )
+ : p( (char *)std::malloc( initial_size ) ), size_( p ? initial_size : 0 ) {}
+ ~Resizable_buffer() { if( p ) std::free( p ); p = 0; size_ = 0; }
+
+ bool resize( const unsigned long long new_size )
+ {
+ if( new_size >= LONG_MAX ) return false;
+ if( size_ < new_size )
+ {
+ char * const tmp = (char *)std::realloc( p, new_size );
+ if( !tmp ) return false;
+ p = tmp; size_ = new_size;
+ }
+ return true;
+ }
+ char * operator()() { return p; }
+ const char * operator()() const { return p; }
+ uint8_t * u8() { return (uint8_t *)p; }
+ const uint8_t * u8() const { return (const uint8_t *)p; }
+ unsigned long size() const { return size_; }
+ };
+
+
+inline bool uid_in_ustar_range( const long long uid ) // also for gid
+ { return uid >= 0 && uid < 1 << 21; }
+
+inline bool time_in_ustar_range( const long long seconds )
+ { return seconds >= 0 && seconds < 1LL << 33; }
+
+
+/* The sign of the seconds field applies to the whole time value.
+ A nanoseconds value out of range means an invalid time. */
+class Etime // time since (or before) the epoch
+ {
+ long long sec_;
+ int nsec_; // range [0, 999_999_999]
+
+public:
+ Etime() : sec_( 0 ), nsec_( -1 ) {}
+ void reset() { sec_ = 0; nsec_ = -1; }
+ void set( const long long s ) { sec_ = s; nsec_ = 0; }
+ long long sec() const { return sec_; }
+ int nsec() const { return nsec_; }
+ bool isvalid() const { return nsec_ >= 0 && nsec_ <= 999999999; }
+ bool out_of_ustar_range() const
+ { return isvalid() && !time_in_ustar_range( sec_ ); }
+
+ unsigned decimal_size() const;
+ unsigned print( char * const buf ) const;
+ bool parse( const char * const ptr, const char ** const tailp,
+ const int size );
+ };
+
+
+class Extended // stores metadata from/for extended records
+ {
+ static std::vector< std::string > unknown_keywords; // already diagnosed
+ std::string linkpath_; // these are the real metadata
+ std::string path_;
+ long long file_size_; // >= 0 && <= max_file_size
+ long long uid_, gid_; // may not fit in unsigned int
+ Etime atime_, mtime_;
+
+ // cached sizes; if full_size_ <= -4 they must be recalculated
+ mutable int edsize_; // extended data size
+ mutable int padded_edsize_; // edsize rounded up
+ mutable int full_size_; // header + padded edsize
+ mutable int linkpath_recsize_;
+ mutable int path_recsize_;
+ mutable int file_size_recsize_;
+ mutable int uid_recsize_;
+ mutable int gid_recsize_;
+ mutable int atime_recsize_;
+ mutable int mtime_recsize_;
+
+ // true if CRC present in parsed or formatted records
+ mutable bool crc_present_;
+
+ void calculate_sizes() const;
+ void unknown_keyword( const char * const buf, const int size ) const;
+
+public:
+ static const std::string crc_record;
+ std::string removed_prefix;
+
+ Extended()
+ : file_size_( 0 ), uid_( -1 ), gid_( -1 ), edsize_( 0 ),
+ padded_edsize_( 0 ), full_size_( 0 ), linkpath_recsize_( 0 ),
+ path_recsize_( 0 ), file_size_recsize_( 0 ), uid_recsize_( 0 ),
+ gid_recsize_( 0 ), atime_recsize_( 0 ), mtime_recsize_( 0 ),
+ crc_present_( false ) {}
+
+ void reset()
+ { linkpath_.clear(); path_.clear(); file_size_ = 0; uid_ = -1; gid_ = -1;
+ atime_.reset(); mtime_.reset(); edsize_ = 0; padded_edsize_ = 0;
+ full_size_ = 0; linkpath_recsize_ = 0; path_recsize_ = 0;
+ file_size_recsize_ = 0; uid_recsize_ = 0; gid_recsize_ = 0;
+ atime_recsize_ = 0; mtime_recsize_ = 0; crc_present_ = false;
+ removed_prefix.clear(); }
+
+ const std::string & linkpath() const { return linkpath_; }
+ const std::string & path() const { return path_; }
+ long long file_size() const { return file_size_; }
+ long long get_file_size_and_reset( const Tar_header header );
+ long long get_uid() const { return uid_; }
+ long long get_gid() const { return gid_; }
+ const Etime & atime() const { return atime_; }
+ const Etime & mtime() const { return mtime_; }
+
+ void linkpath( const char * const lp ) { linkpath_ = lp; full_size_ = -4; }
+ void path( const char * const p ) { path_ = p; full_size_ = -4; }
+ void file_size( const long long fs ) { full_size_ = -4;
+ file_size_ = ( fs >= 0 && fs <= max_file_size ) ? fs : 0; }
+ bool set_uid( const long long id )
+ { if( id >= 0 ) { uid_ = id; full_size_ = -4; } return id >= 0; }
+ bool set_gid( const long long id )
+ { if( id >= 0 ) { gid_ = id; full_size_ = -4; } return id >= 0; }
+ void set_atime( const long long s ) { atime_.set( s ); full_size_ = -4; }
+ void set_mtime( const long long s ) { mtime_.set( s ); full_size_ = -4; }
+
+ /* Return the size of the extended block, or 0 if empty.
+ Return -1 if error, -2 if out of memory, -3 if block too long. */
+ int full_size() const
+ { if( full_size_ <= -4 ) calculate_sizes(); return full_size_; }
+ int format_block( Resizable_buffer & rbuf ) const;
+ const char * full_size_error() const;
+
+ bool crc_present() const { return crc_present_; }
+ bool parse( const char * const buf, const int edsize,
+ const bool permissive );
+ void fill_from_ustar( const Tar_header header );
+ };
+
+
+class CRC32
+ {
+ uint32_t data[256]; // Table of CRCs of all 8-bit messages.
+
+public:
+ CRC32( const bool castagnoli = false )
+ {
+ const unsigned cpol = 0x82F63B78U; // CRC32-C Castagnoli polynomial.
+ const unsigned ipol = 0xEDB88320U; // IEEE 802.3 Ethernet polynomial.
+ const unsigned poly = castagnoli ? cpol : ipol;
+
+ for( unsigned n = 0; n < 256; ++n )
+ {
+ unsigned c = n;
+ for( int k = 0; k < 8; ++k )
+ { if( c & 1 ) c = poly ^ ( c >> 1 ); else c >>= 1; }
+ data[n] = c;
+ }
+ }
+
+ void update_byte( uint32_t & crc, const uint8_t byte ) const
+ { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); }
+
+ // about as fast as it is possible without messing with endianness
+ void update_buf( uint32_t & crc, const uint8_t * const buffer,
+ const int size ) const
+ {
+ uint32_t c = crc;
+ for( int i = 0; i < size; ++i )
+ c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 );
+ crc = c;
+ }
+
+ uint32_t compute_crc( const uint8_t * const buffer, const int size ) const
+ {
+ uint32_t crc = 0xFFFFFFFFU;
+ for( int i = 0; i < size; ++i )
+ crc = data[(crc^buffer[i])&0xFF] ^ ( crc >> 8 );
+ return crc ^ 0xFFFFFFFFU;
+ }
+
+ // Calculates the crc of size bytes except a window of 8 bytes at pos
+ uint32_t windowed_crc( const uint8_t * const buffer, const int pos,
+ const int size ) const
+ {
+ uint32_t crc = 0xFFFFFFFFU;
+ update_buf( crc, buffer, pos );
+ update_buf( crc, buffer + pos + 8, size - pos - 8 );
+ return crc ^ 0xFFFFFFFFU;
+ }
+ };
+
+
+struct Lzma_options
+ {
+ int dictionary_size; // 4 KiB .. 512 MiB
+ int match_len_limit; // 5 .. 273
+ };
+const Lzma_options option_mapping[] =
+ {
+ { 65535, 16 }, // -0
+ { 1 << 20, 5 }, // -1
+ { 3 << 19, 6 }, // -2
+ { 1 << 21, 8 }, // -3
+ { 3 << 20, 12 }, // -4
+ { 1 << 22, 20 }, // -5
+ { 1 << 23, 36 }, // -6
+ { 1 << 24, 68 }, // -7
+ { 3 << 23, 132 }, // -8
+ { 1 << 25, 273 } }; // -9
+
+
+enum {
+ min_dictionary_bits = 12,
+ min_dictionary_size = 1 << min_dictionary_bits,
+ max_dictionary_bits = 29,
+ max_dictionary_size = 1 << max_dictionary_bits,
+ min_member_size = 36,
+ min_data_size = 2 * min_dictionary_size,
+ max_data_size = 2 * max_dictionary_size };
+
+
+inline bool isvalid_ds( const unsigned dictionary_size )
+ { return ( dictionary_size >= min_dictionary_size &&
+ dictionary_size <= max_dictionary_size ); }
+
+
+const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 }; // "LZIP"
+
+struct Lzip_header
+ {
+ enum { size = 6 };
+ uint8_t data[size]; // 0-3 magic bytes
+ // 4 version
+ // 5 coded dictionary size
+
+ bool check_magic() const
+ { return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); }
+
+ bool check_prefix( const int sz ) const // detect (truncated) header
+ {
+ for( int i = 0; i < sz && i < 4; ++i )
+ if( data[i] != lzip_magic[i] ) return false;
+ return ( sz > 0 );
+ }
+
+ bool check_corrupt() const // detect corrupt header
+ {
+ int matches = 0;
+ for( int i = 0; i < 4; ++i )
+ if( data[i] == lzip_magic[i] ) ++matches;
+ return ( matches > 1 && matches < 4 );
+ }
+
+ uint8_t version() const { return data[4]; }
+ bool check_version() const { return ( data[4] == 1 ); }
+
+ unsigned dictionary_size() const
+ {
+ unsigned sz = ( 1 << ( data[5] & 0x1F ) );
+ if( sz > min_dictionary_size )
+ sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 );
+ return sz;
+ }
+
+ bool check() const
+ { return check_magic() && check_version() &&
+ isvalid_ds( dictionary_size() ); }
+ };
+
+
+struct Lzip_trailer
+ {
+ enum { size = 20 };
+ uint8_t data[size]; // 0-3 CRC32 of the uncompressed data
+ // 4-11 size of the uncompressed data
+ // 12-19 member size including header and trailer
+
+ unsigned data_crc() const
+ {
+ unsigned tmp = 0;
+ for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
+ return tmp;
+ }
+
+ unsigned long long data_size() const
+ {
+ unsigned long long tmp = 0;
+ for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
+ return tmp;
+ }
+
+ unsigned long long member_size() const
+ {
+ unsigned long long tmp = 0;
+ for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
+ return tmp;
+ }
+
+ bool check_consistency() const // check internal consistency
+ {
+ const unsigned crc = data_crc();
+ const unsigned long long dsize = data_size();
+ if( ( crc == 0 ) != ( dsize == 0 ) ) return false;
+ const unsigned long long msize = member_size();
+ if( msize < min_member_size ) return false;
+ const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size;
+ if( mlimit > dsize && msize > mlimit ) return false;
+ const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1;
+ if( dlimit > msize && dsize > dlimit ) return false;
+ return true;
+ }
+ };
+
+
+enum Program_mode { m_none, m_append, m_compress, m_concatenate, m_create,
+ m_delete, m_diff, m_extract, m_list };
+enum Solidity { no_solid, bsolid, dsolid, asolid, solid };
+class Arg_parser;
+
+struct Cl_options // command-line options
+ {
+ const Arg_parser & parser;
+ std::string archive_name;
+ std::string output_filename;
+ long long mtime;
+ long long uid;
+ long long gid;
+ Program_mode program_mode;
+ Solidity solidity;
+ int data_size;
+ int debug_level;
+ int level; // compression level, < 0 means uncompressed
+ int num_files;
+ int num_workers; // start this many worker threads
+ int out_slots;
+ bool dereference;
+ bool filenames_given;
+ bool ignore_ids;
+ bool ignore_metadata;
+ bool ignore_overflow;
+ bool keep_damaged;
+ bool level_set; // compression level set in command line
+ bool missing_crc;
+ bool mtime_set;
+ bool permissive;
+ bool preserve_permissions;
+ bool warn_newer;
+
+ Cl_options( const Arg_parser & ap )
+ : parser( ap ), mtime( 0 ), uid( -1 ), gid( -1 ), program_mode( m_none ),
+ solidity( bsolid ), data_size( 0 ), debug_level( 0 ), level( 6 ),
+ num_files( 0 ), num_workers( -1 ), out_slots( 64 ), dereference( false ),
+ filenames_given( false ), ignore_ids( false ), ignore_metadata( false ),
+ ignore_overflow( false ), keep_damaged( false ), level_set( false ),
+ missing_crc( false ), mtime_set( false ), permissive( false ),
+ preserve_permissions( false ), warn_newer( false ) {}
+
+ void set_level( const int l ) { level = l; level_set = true; }
+
+ int compressed() const; // tri-state bool with error (-2)
+ bool uncompressed() const { return level < 0 || level > 9; }
+ bool to_stdout() const { return output_filename == "-"; }
+ };
+
+inline void set_retval( int & retval, const int new_val )
+ { if( retval < new_val ) retval = new_val; }
+
+const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
+const char * const bad_dict_msg = "Invalid dictionary size in member header.";
+const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
+const char * const bad_hdr_msg = "Corrupt or invalid tar header.";
+const char * const gblrec_msg = "Error in global extended records.";
+const char * const extrec_msg = "Error in extended records.";
+const char * const miscrc_msg = "Missing CRC in extended records.";
+const char * const misrec_msg = "Missing extended records.";
+const char * const longrec_msg = "Extended records are too long.";
+const char * const end_msg = "Archive ends unexpectedly.";
+const char * const mem_msg = "Not enough memory.";
+const char * const mem_msg2 = "Not enough memory. Try a lower compression level.";
+const char * const fv_msg1 = "Format violation: extended header followed by EOA blocks.";
+const char * const fv_msg2 = "Format violation: extended header followed by global header.";
+const char * const fv_msg3 = "Format violation: consecutive extended headers found.";
+const char * const posix_msg = "This does not look like a POSIX tar archive.";
+const char * const posix_lz_msg = "This does not look like a POSIX tar.lz archive.";
+const char * const eclosa_msg = "Error closing archive";
+const char * const eclosf_msg = "Error closing file";
+const char * const nfound_msg = "Not found in archive.";
+const char * const seek_msg = "Seek error";
+const char * const werr_msg = "Write error";
+const char * const chdir_msg = "Error changing working directory";
+const char * const intdir_msg = "Failed to create intermediate directory";
+
+// defined in common.cc
+unsigned long long parse_octal( const uint8_t * const ptr, const int size );
+int readblock( const int fd, uint8_t * const buf, const int size );
+int writeblock( const int fd, const uint8_t * const buf, const int size );
+
+// defined in common_decode.cc
+bool block_is_zero( const uint8_t * const buf, const int size );
+bool format_member_name( const Extended & extended, const Tar_header header,
+ Resizable_buffer & rbuf, const bool long_format );
+bool show_member_name( const Extended & extended, const Tar_header header,
+ const int vlevel, Resizable_buffer & rbuf );
+bool check_skip_filename( const Cl_options & cl_opts,
+ std::vector< char > & name_pending,
+ const char * const filename, const int chdir_fd = -1 );
+bool make_dirs( const std::string & name );
+
+// defined in common_mutex.cc
+void exit_fail_mt( const int retval = 1 ); // terminate the program
+bool print_removed_prefix( const std::string & prefix,
+ std::string * const msgp = 0 );
+void set_error_status( const int retval );
+int final_exit_status( int retval, const bool show_msg = true );
+
+// defined in compress.cc
+void show_atpos_error( const char * const filename, const long long pos,
+ const bool isarchive );
+int compress( const Cl_options & cl_opts );
+
+// defined in create.cc
+bool copy_file( const int infd, const int outfd, const long long max_size = -1 );
+bool writeblock_wrapper( const int outfd, const uint8_t * const buffer,
+ const int size );
+bool write_eoa_records( const int outfd, const bool compressed );
+const char * remove_leading_dotslash( const char * const filename,
+ std::string * const removed_prefixp, const bool dotdot = false );
+bool fill_headers( const char * const filename, Extended & extended,
+ Tar_header header, long long & file_size, const int flag );
+bool block_is_full( const int extended_size,
+ const unsigned long long file_size,
+ const unsigned long long target_size,
+ unsigned long long & partial_data_size );
+unsigned ustar_chksum( const Tar_header header );
+bool check_ustar_chksum( const Tar_header header );
+bool has_lz_ext( const std::string & name );
+int concatenate( const Cl_options & cl_opts );
+int encode( const Cl_options & cl_opts );
+
+// defined in create_lz.cc
+int encode_lz( const Cl_options & cl_opts, const char * const archive_namep,
+ const int outfd );
+
+// defined in decode.cc
+bool compare_file_type( std::string & estr, std::string & ostr,
+ const Cl_options & cl_opts,
+ const Extended & extended, const Tar_header header );
+class Archive_reader_base;
+bool compare_file_contents( std::string & estr, std::string & ostr,
+ Archive_reader_base & ar, const long long file_size,
+ const char * const filename, const int infd2 );
+int decode( const Cl_options & cl_opts );
+
+// defined in decode_lz.cc
+struct Archive_descriptor;
+int decode_lz( const Cl_options & cl_opts, const Archive_descriptor & ad,
+ std::vector< char > & name_pending );
+
+// defined in delete.cc
+bool safe_seek( const int fd, const long long pos );
+int tail_copy( const Arg_parser & parser, const Archive_descriptor & ad,
+ std::vector< char > & name_pending, const long long istream_pos,
+ const int outfd, int retval );
+int delete_members( const Cl_options & cl_opts );
+
+// defined in delete_lz.cc
+int delete_members_lz( const Cl_options & cl_opts,
+ const Archive_descriptor & ad,
+ std::vector< char > & name_pending, const int outfd );
+
+// defined in exclude.cc
+namespace Exclude {
+void add_pattern( const std::string & arg );
+void clear();
+bool excluded( const char * const filename );
+} // end namespace Exclude
+
+// defined in extended.cc
+extern const CRC32 crc32c;
+
+// defined in lzip_index.cc
+int seek_read( const int fd, uint8_t * const buf, const int size,
+ const long long pos );
+
+// defined in main.cc
+extern int verbosity;
+extern const char * const program_name;
+struct stat;
+int hstat( const char * const filename, struct stat * const st,
+ const bool dereference );
+bool nonempty_arg( const Arg_parser & parser, const int i );
+int open_instream( const std::string & name );
+int open_outstream( const std::string & name, const bool create = true,
+ Resizable_buffer * const rbufp = 0, const bool force = true );
+void show_error( const char * const msg, const int errcode = 0,
+ const bool help = false );
+bool format_error( Resizable_buffer & rbuf, const int errcode,
+ const char * const format, ... );
+void print_error( const int errcode, const char * const format, ... );
+void format_file_error( std::string & estr, const char * const filename,
+ const char * const msg, const int errcode = 0 );
+bool format_file_error( Resizable_buffer & rbuf, const char * const filename,
+ const char * const msg, const int errcode = 0 );
+void show_file_error( const char * const filename, const char * const msg,
+ const int errcode = 0 );
+void internal_error( const char * const msg );
diff --git a/testsuite/check.sh b/testsuite/check.sh
new file mode 100755
index 0000000..9027bd5
--- /dev/null
+++ b/testsuite/check.sh
@@ -0,0 +1,1481 @@
+#! /bin/sh
+# check script for Tarlz - Archiver with multimember lzip compression
+# Copyright (C) 2013-2024 Antonio Diaz Diaz.
+#
+# This script is free software: you have unlimited permission
+# to copy, distribute, and modify it.
+
+LC_ALL=C
+export LC_ALL
+objdir=`pwd`
+testdir=`cd "$1" ; pwd`
+TARLZ="${objdir}"/tarlz
+framework_failure() { echo "failure in testing framework" ; exit 1 ; }
+
+if [ ! -f "${TARLZ}" ] || [ ! -x "${TARLZ}" ] ; then
+ echo "${TARLZ}: cannot execute"
+ exit 1
+fi
+
+[ -e "${TARLZ}" ] 2> /dev/null ||
+ {
+ echo "$0: a POSIX shell is required to run the tests"
+ echo "Try bash -c \"$0 $1 $2\""
+ exit 1
+ }
+
+if [ -d tmp ] ; then rm -rf tmp ; fi
+mkdir tmp
+cd "${objdir}"/tmp || framework_failure
+
+in="${testdir}"/test.txt
+in_lz="${testdir}"/test.txt.lz
+in_tar="${testdir}"/test.txt.tar
+in_tar_lz="${testdir}"/test.txt.tar.lz
+inbad1="${testdir}"/test_bad1.txt
+inbad2="${testdir}"/test_bad2.txt
+test3="${testdir}"/test3.tar
+test3_lz="${testdir}"/test3.tar.lz
+test3dir="${testdir}"/test3_dir.tar
+test3dir_lz="${testdir}"/test3_dir.tar.lz
+test3dot_lz="${testdir}"/test3_dot.tar.lz
+t155="${testdir}"/t155.tar
+t155_lz="${testdir}"/t155.tar.lz
+tlzit1="${testdir}"/tlz_in_tar1.tar
+tlzit2="${testdir}"/tlz_in_tar2.tar
+bad1="${testdir}"/test3_bad1.tar
+bad2="${testdir}"/test3_bad2.tar
+bad3="${testdir}"/test3_bad3.tar
+bad4="${testdir}"/test3_bad4.tar
+bad5="${testdir}"/test3_bad5.tar
+bad1_lz="${testdir}"/test3_bad1.tar.lz
+bad2_lz="${testdir}"/test3_bad2.tar.lz
+bad3_lz="${testdir}"/test3_bad3.tar.lz
+bad4_lz="${testdir}"/test3_bad4.tar.lz
+bad5_lz="${testdir}"/test3_bad5.tar.lz
+bad6_lz="${testdir}"/test3_bad6.tar.lz
+eoa="${testdir}"/eoa_blocks.tar
+eoa_lz="${testdir}"/eoa_blocks.tar.lz
+fail=0
+lwarnc=0
+test_failed() { fail=1 ; printf " $1" ; [ -z "$2" ] || printf "($2)" ; }
+is_compressed() { [ "`dd if="$1" bs=4 count=1 2> /dev/null`" = LZIP ] ; }
+is_uncompressed() { [ "`dd if="$1" bs=4 count=1 2> /dev/null`" != LZIP ] ; }
+cyg_symlink() { [ ${lwarnc} = 0 ] &&
+ printf "\nwarning: your OS follows symbolic links to directories even when tarlz asks it not to\n$1"
+ lwarnc=1 ; }
+
+# Description of test files for tarlz:
+# test.txt.tar.lz: 1 member (test.txt).
+# t155.tar[.lz]: directory + 3 links + file + EOA, all with 155 char names
+# t155_fv?.tar[.lz]: like t155.tar but with 3 kinds of format violations
+# t155_fv1.tar[.lz]: extra extended header before EOA blocks
+# t155_fv2.tar[.lz]: first extended header followed by global header
+# t155_fv3.tar[.lz]: consecutive extended headers in last member
+# t155_fv[456].tar.lz: like t155_fv[123].tar.lz but violation alone in member
+# tar_in_tlz1.tar.lz: 2 members (test.txt.tar test3.tar) 3 lzip members
+# tar_in_tlz2.tar.lz: 2 members (test.txt.tar test3.tar) 5 lzip members
+# ts_in_link.tar.lz: 4 symbolic links (link[1-4]) to / /dir/ dir/ dir(107/)
+# test_bad1.txt.tar.lz: truncated at offset 6000 (of 7495)
+# test_bad2.txt.tar.lz: byte at offset 6000 changed from 0x56 to 0x46
+# test3.tar[.lz]: 3 members (foo bar baz) + 2 zeroed 512-byte blocks
+# test3_dir.tar[.lz] like test3.tar but members /dir/foo /dir/bar /dir/baz
+# test3_dot.tar.lz: 3 times 3 members ./foo ././bar ./././baz
+# the 3 central members with filename in extended header
+# test3_bad1.tar: byte at offset 259 changed from 't' to '0' (magic)
+# test3_bad2.tar: byte at offset 1283 changed from 't' to '0' (magic)
+# test3_bad3.tar: byte at offset 2559 changed from 0x00 to 0x20 (padding)
+# test3_bad4.tar: byte at offset 1283 changed from 't' to '0' (magic)
+# byte at offset 2307 changed from 't' to '0' (magic)
+# test3_bad5.tar: 510 zeros + "LZ" prepended to test3.tar (bogus lz header)
+# test3_bad1.tar.lz: byte at offset 2 changed from 'I' to 'i' (magic)
+# test3_bad2.tar.lz: byte at offset 49 changed from 0x49 to 0x69 (mid stream)
+# test3_bad3.tar.lz: byte at offset 176 changed from 0x7D to 0x6D (mid stream)
+# test3_bad4.tar.lz: combined damage of test3_bad2.tar.lz and test3_bad3.tar.lz
+# test3_bad5.tar.lz: [71-134] --> zeroed (first trailer + second header)
+# test3_bad6.tar.lz: 510 zeros prepended to test3.tar.lz (header in two blocks)
+# test3_eoa?.tar: like test3_eoa?.tar.lz but uncompressed
+# test3_eoa1.tar.lz: test3.tar.lz without EOA blocks
+# test3_eoa2.tar.lz: test3.tar.lz with only one EOA block
+# test3_eoa3.tar.lz: test3.tar.lz with one zeroed block between foo and bar
+# test3_eoa4.tar.lz: test3.tar.lz ended by extended header without EOA blocks
+# test3_eoa5.tar.lz: test3.tar.lz split extended bar member, without EOA blocks
+# test3_em?.tar.lz: test3.tar.lz with one empty lzip member at each position
+# test3_em6.tar.lz: test3.tar.lz preceded by four empty lzip members
+# test3_gh?.tar: test3.tar with global header at each position
+# test3_gh?.tar.lz: test3.tar.lz with global before bar split in 4 ways
+# test3_gh5.tar.lz: test3.tar.lz with global in lzip member before foo
+# test3_gh6.tar.lz: test3.tar.lz with global before foo in same member
+# test3_nn.tar[.lz]: test3.tar[.lz] with zeroed name (no name) in bar member
+# test3_sm?.tar.lz: test3.tar.lz with extended bar member split in 4 ways
+# tlz_in_tar1.tar: 1 member (test3.tar.lz) first magic damaged
+# tlz_in_tar2.tar: 2 members (foo test3.tar.lz) first magic damaged
+# ug32chars.tar.lz: 1 member (foo) with 32-character owner and group names
+# ug32767.tar.lz: 1 member (foo) with numerical-only owner and group
+
+# Note that multi-threaded --list succeeds with test_bad2.txt.tar.lz and
+# test3_bad3.tar.lz because their headers are intact.
+
+"${TARLZ}" --check-lib # just print warning
+[ $? != 2 ] || test_failed $LINENO # unless bad lzlib.h
+
+printf "testing tarlz-%s..." "$2"
+
+"${TARLZ}" -q -tf "${in}"
+[ $? = 2 ] || test_failed $LINENO
+"${TARLZ}" -q -tf "${in_lz}"
+[ $? = 2 ] || test_failed $LINENO
+"${TARLZ}" -q -tf "${in_tar_lz}" -f "${in_tar_lz}"
+[ $? = 1 ] || test_failed $LINENO
+"${TARLZ}" -q -tf nx_file
+[ $? = 1 ] || test_failed $LINENO
+"${TARLZ}" -tf 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${TARLZ}" -q -cf out.tar.lz
+[ $? = 1 ] || test_failed $LINENO
+[ ! -e out.tar.lz ] || test_failed $LINENO
+"${TARLZ}" -q -cf out.tar
+[ $? = 1 ] || test_failed $LINENO
+[ ! -e out.tar ] || test_failed $LINENO
+"${TARLZ}" -rf out.tar.lz || test_failed $LINENO
+[ ! -e out.tar.lz ] || test_failed $LINENO
+"${TARLZ}" -rf out.tar || test_failed $LINENO
+[ ! -e out.tar ] || test_failed $LINENO
+"${TARLZ}" -r || test_failed $LINENO
+"${TARLZ}" -q -rf out.tar.lz "${in}"
+[ $? = 1 ] || test_failed $LINENO
+[ ! -e out.tar.lz ] || test_failed $LINENO
+"${TARLZ}" -q -rf out.tar "${in}"
+[ $? = 1 ] || test_failed $LINENO
+[ ! -e out.tar ] || test_failed $LINENO
+"${TARLZ}" -q -c "${in}" nx_file > /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${TARLZ}" -q -c -C nx_dir "${in}"
+[ $? = 1 ] || test_failed $LINENO
+"${TARLZ}" -q -x -C nx_dir "${test3_lz}"
+[ $? = 1 ] || test_failed $LINENO
+touch empty.tar.lz empty.tlz || framework_failure # list an empty lz file
+"${TARLZ}" -q -tf empty.tar.lz
+[ $? = 2 ] || test_failed $LINENO
+"${TARLZ}" -q -tf empty.tlz
+[ $? = 2 ] || test_failed $LINENO
+rm -f empty.tar.lz empty.tlz || framework_failure
+touch empty.tar || framework_failure # compress an empty archive
+"${TARLZ}" -q -z empty.tar
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e empty.tar.lz ] || test_failed $LINENO
+rm -f empty.tar empty.tar.lz || framework_failure
+"${TARLZ}" -q -cd # test mixed operations
+[ $? = 1 ] || test_failed $LINENO
+"${TARLZ}" -q -cr
+[ $? = 1 ] || test_failed $LINENO
+"${TARLZ}" -q -ct
+[ $? = 1 ] || test_failed $LINENO
+"${TARLZ}" -q -cx
+[ $? = 1 ] || test_failed $LINENO
+"${TARLZ}" -q -tx
+[ $? = 1 ] || test_failed $LINENO
+"${TARLZ}" -q -ctx
+[ $? = 1 ] || test_failed $LINENO
+for i in A c d r t x -delete ; do # test -o with operations other than -z
+ "${TARLZ}" -q -$i -o -
+ [ $? = 1 ] || test_failed $LINENO $i
+done
+"${TARLZ}" -q -z -f -
+[ $? = 1 ] || test_failed $LINENO
+"${TARLZ}" -q -z .
+[ $? = 1 ] || test_failed $LINENO
+"${TARLZ}" -z -o - --uncompressed "${test3}" > /dev/null 2>&1
+[ $? = 1 ] || test_failed $LINENO
+"${TARLZ}" -q -tf "${in_tar_lz}" "" # empty non-option argument
+[ $? = 1 ] || test_failed $LINENO
+"${TARLZ}" --help > /dev/null || test_failed $LINENO
+"${TARLZ}" -V > /dev/null || test_failed $LINENO
+"${TARLZ}" --bad_option -tf "${test3_lz}" 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${TARLZ}" -tf 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+bad_dates='@-9223372036854775809 @9223372036854775808
+ -2147481749-01-01T00:00:00 2147483648-01-01T00:00:00
+ 2017-10-01T 2017-10 ./nx_file'
+for i in ${bad_dates} ; do
+ "${TARLZ}" -c --mtime="$i" "${in}" > /dev/null 2>&1
+ [ $? = 1 ] || test_failed $LINENO "$i"
+done
+"${TARLZ}" --owner=invalid_owner_name -tf "${test3_lz}" 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+"${TARLZ}" --group=invalid_group_name -tf "${test3_lz}" 2> /dev/null
+[ $? = 1 ] || test_failed $LINENO
+
+printf "\ntesting --list and --extract..."
+
+# test --list and --extract
+"${TARLZ}" -tf "${eoa_lz}" --missing-crc || test_failed $LINENO
+"${TARLZ}" -xf "${eoa_lz}" --missing-crc || test_failed $LINENO
+"${TARLZ}" -C nx_dir -tf "${in_tar}" > /dev/null || test_failed $LINENO
+"${TARLZ}" -xf "${in_tar}" --missing-crc || test_failed $LINENO
+cmp "${in}" test.txt || test_failed $LINENO
+rm -f test.txt || framework_failure
+"${TARLZ}" -tf "${in_tar_lz}" --missing-crc > /dev/null || test_failed $LINENO
+for i in 0 2 6 ; do
+ "${TARLZ}" -n$i -xf "${in_tar_lz}" --missing-crc || test_failed $LINENO $i
+ cmp "${in}" test.txt || test_failed $LINENO $i
+ rm -f test.txt || framework_failure
+done
+
+# test3 reference files for -t and -tv (list3, vlist3)
+"${TARLZ}" -tf "${test3}" > list3 || test_failed $LINENO
+"${TARLZ}" -tvf "${test3}" > vlist3 || test_failed $LINENO
+for i in 0 2 6 ; do
+ "${TARLZ}" -n$i -tf "${test3_lz}" > out || test_failed $LINENO $i
+ diff -u list3 out || test_failed $LINENO $i
+ "${TARLZ}" -n$i -tvf "${test3_lz}" > out || test_failed $LINENO $i
+ diff -u vlist3 out || test_failed $LINENO $i
+done
+rm -f out || framework_failure
+
+# test3 reference files for cmp
+cat "${testdir}"/rfoo > cfoo || framework_failure
+cat "${testdir}"/rbar > cbar || framework_failure
+cat "${testdir}"/rbaz > cbaz || framework_failure
+
+# test --list and --extract test3
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -xf "${test3}" --missing-crc || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+# time and mode comparison always fails on OS/2
+if "${TARLZ}" -df "${test3}" --ignore-ids ; then d_works=yes
+else printf "warning: some '--diff' tests will be skipped.\n"
+fi
+rm -f foo bar baz || framework_failure
+for i in 0 2 6 ; do
+ "${TARLZ}" -n$i -xf "${test3_lz}" --missing-crc || test_failed $LINENO $i
+ cmp cfoo foo || test_failed $LINENO $i
+ cmp cbar bar || test_failed $LINENO $i
+ cmp cbaz baz || test_failed $LINENO $i
+ rm -f foo bar baz || framework_failure
+ "${TARLZ}" -n$i -tvf "${test3_lz}" ./foo ./bar ./baz > out 2> /dev/null ||
+ test_failed $LINENO $i
+ diff -u vlist3 out || test_failed $LINENO $i
+ rm -f out || framework_failure
+ "${TARLZ}" -q -n$i -xf "${test3_lz}" ./foo ./bar ./baz || test_failed $LINENO $i
+ cmp cfoo foo || test_failed $LINENO $i
+ cmp cbar bar || test_failed $LINENO $i
+ cmp cbaz baz || test_failed $LINENO $i
+ rm -f foo bar baz || framework_failure
+ "${TARLZ}" -n$i -xf "${test3_lz}" foo/ bar// baz/// || test_failed $LINENO $i
+ cmp cfoo foo || test_failed $LINENO $i
+ cmp cbar bar || test_failed $LINENO $i
+ cmp cbaz baz || test_failed $LINENO $i
+ rm -f foo bar baz || framework_failure
+ "${TARLZ}" -q -n$i -xf "${test3dot_lz}" --missing-crc || test_failed $LINENO $i
+ cmp cfoo foo || test_failed $LINENO $i
+ cmp cbar bar || test_failed $LINENO $i
+ cmp cbaz baz || test_failed $LINENO $i
+ rm -f foo bar baz || framework_failure
+ "${TARLZ}" -q -n$i -tf "${test3dot_lz}" foo bar baz || test_failed $LINENO $i
+ "${TARLZ}" -q -n$i -xf "${test3dot_lz}" foo bar baz || test_failed $LINENO $i
+ cmp cfoo foo || test_failed $LINENO $i
+ cmp cbar bar || test_failed $LINENO $i
+ cmp cbaz baz || test_failed $LINENO $i
+ rm -f foo bar baz || framework_failure
+done
+
+# test -C in --diff and --extract
+for i in "${test3}" "${test3_lz}" ; do
+ mkdir dir1 dir2 dir3 || framework_failure
+ "${TARLZ}" -q -xf "$i" -C dir1 foo -C ../dir2 bar -C ../dir3 baz ||
+ test_failed $LINENO "$i"
+ cmp cfoo dir1/foo || test_failed $LINENO "$i"
+ cmp cbar dir2/bar || test_failed $LINENO "$i"
+ cmp cbaz dir3/baz || test_failed $LINENO "$i"
+ if [ "${d_works}" = yes ] ; then
+ "${TARLZ}" -df "$i" -C dir1 foo -C ../dir2 --ignore-ids bar \
+ -C ../dir3 baz || test_failed $LINENO "$i"
+ "${TARLZ}" -df "$i" -C dir3 baz -C ../dir2 bar -C ../dir1 foo \
+ --ignore-ids || test_failed $LINENO "$i"
+ fi
+ rm -rf dir1 dir2 dir3 || framework_failure
+done
+for i in "${test3dir}" "${test3dir_lz}" ; do
+ mkdir dir1 dir2 dir3 || framework_failure
+ "${TARLZ}" -q -xf "$i" -C dir2 dir/bar -C ../dir1 dir/foo \
+ -C ../dir3 dir/baz || test_failed $LINENO "$i"
+ cmp cfoo dir1/dir/foo || test_failed $LINENO "$i"
+ cmp cbar dir2/dir/bar || test_failed $LINENO "$i"
+ cmp cbaz dir3/dir/baz || test_failed $LINENO "$i"
+ if [ "${d_works}" = yes ] ; then
+ "${TARLZ}" -q -df "$i" --ignore-ids -C dir1 dir/foo -C ../dir2 dir/bar \
+ -C ../dir3 dir/baz || test_failed $LINENO "$i"
+ "${TARLZ}" -q -df "${test3}" -C dir1/dir foo -C ../../dir2/dir bar \
+ --ignore-ids -C ../../dir3/dir baz || test_failed $LINENO "$i"
+ fi
+ rm -rf dir1 dir2 dir3 || framework_failure
+done
+
+for i in "${test3dir}" "${test3dir_lz}" ; do
+ "${TARLZ}" -q -tf "$i" --missing-crc || test_failed $LINENO "$i"
+ "${TARLZ}" -q -xf "$i" --missing-crc || test_failed $LINENO "$i"
+ cmp cfoo dir/foo || test_failed $LINENO "$i"
+ cmp cbar dir/bar || test_failed $LINENO "$i"
+ cmp cbaz dir/baz || test_failed $LINENO "$i"
+ rm -rf dir || framework_failure
+ "${TARLZ}" -q -tf "$i" dir || test_failed $LINENO "$i"
+ "${TARLZ}" -q -xf "$i" dir || test_failed $LINENO "$i"
+ cmp cfoo dir/foo || test_failed $LINENO "$i"
+ cmp cbar dir/bar || test_failed $LINENO "$i"
+ cmp cbaz dir/baz || test_failed $LINENO "$i"
+ rm -rf dir || framework_failure
+ "${TARLZ}" -q -tf "$i" dir/foo dir/baz || test_failed $LINENO "$i"
+ "${TARLZ}" -q -xf "$i" dir/foo dir/baz || test_failed $LINENO "$i"
+ cmp cfoo dir/foo || test_failed $LINENO "$i"
+ [ ! -e dir/bar ] || test_failed $LINENO "$i"
+ cmp cbaz dir/baz || test_failed $LINENO "$i"
+ rm -rf dir || framework_failure
+done
+
+# test --extract --exclude
+"${TARLZ}" -xf "${test3}" --exclude='f*o' --exclude=baz || test_failed $LINENO
+[ ! -e foo ] || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+[ ! -e baz ] || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+for i in 0 2 6 ; do
+ "${TARLZ}" -n$i -xf "${test3_lz}" --exclude=bar || test_failed $LINENO $i
+ cmp cfoo foo || test_failed $LINENO $i
+ [ ! -e bar ] || test_failed $LINENO $i
+ cmp cbaz baz || test_failed $LINENO $i
+ rm -f foo bar baz || framework_failure
+ "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude='?ar' || test_failed $LINENO $i
+ cmp cfoo dir/foo || test_failed $LINENO $i
+ [ ! -e dir/bar ] || test_failed $LINENO $i
+ cmp cbaz dir/baz || test_failed $LINENO $i
+ rm -rf dir || framework_failure
+ "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude=dir/bar || test_failed $LINENO $i
+ cmp cfoo dir/foo || test_failed $LINENO $i
+ [ ! -e dir/bar ] || test_failed $LINENO $i
+ cmp cbaz dir/baz || test_failed $LINENO $i
+ rm -rf dir || framework_failure
+ "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude=dir || test_failed $LINENO $i
+ [ ! -e dir ] || test_failed $LINENO $i
+ rm -rf dir || framework_failure
+ "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude='dir/*' || test_failed $LINENO $i
+ [ ! -e dir ] || test_failed $LINENO $i
+ rm -rf dir || framework_failure
+ "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude='[bf][ao][orz]' ||
+ test_failed $LINENO $i
+ [ ! -e dir ] || test_failed $LINENO $i
+ rm -rf dir || framework_failure
+ "${TARLZ}" -q -n$i -xf "${test3dir_lz}" --exclude='*o' dir/foo ||
+ test_failed $LINENO $i
+ [ ! -e dir ] || test_failed $LINENO $i
+ rm -rf dir || framework_failure
+done
+
+# test --list and --extract EOA
+"${TARLZ}" -tvf "${testdir}"/test3_eoa1.tar > out 2> /dev/null
+[ $? = 2 ] || test_failed $LINENO
+diff -u vlist3 out || test_failed $LINENO
+"${TARLZ}" -tvf "${testdir}"/test3_eoa2.tar > out || test_failed $LINENO
+diff -u vlist3 out || test_failed $LINENO
+"${TARLZ}" -q -tf "${testdir}"/test3_eoa3.tar || test_failed $LINENO
+"${TARLZ}" -tvf "${testdir}"/test3_eoa4.tar > out 2> /dev/null
+[ $? = 2 ] || test_failed $LINENO
+diff -u vlist3 out || test_failed $LINENO
+for i in 0 2 6 ; do
+ "${TARLZ}" -n$i -tvf "${testdir}"/test3_eoa1.tar.lz > out 2> /dev/null
+ [ $? = 2 ] || test_failed $LINENO $i
+ diff -u vlist3 out || test_failed $LINENO $i
+ "${TARLZ}" -n$i -tvf "${testdir}"/test3_eoa2.tar.lz > out ||
+ test_failed $LINENO $i
+ diff -u vlist3 out || test_failed $LINENO $i
+ "${TARLZ}" -q -n$i -tf "${testdir}"/test3_eoa3.tar.lz ||
+ test_failed $LINENO $i
+ "${TARLZ}" -n$i -tvf "${testdir}"/test3_eoa4.tar.lz > out 2> /dev/null
+ [ $? = 2 ] || test_failed $LINENO $i
+ diff -u vlist3 out || test_failed $LINENO $i
+ "${TARLZ}" -n$i -tvf "${testdir}"/test3_eoa5.tar.lz > out 2> /dev/null
+ [ $? = 2 ] || test_failed $LINENO $i
+ diff -u vlist3 out || test_failed $LINENO $i
+done
+rm -f out || framework_failure
+#
+"${TARLZ}" -q -xf "${testdir}"/test3_eoa1.tar
+[ $? = 2 ] || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -xf "${testdir}"/test3_eoa2.tar || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -xf "${testdir}"/test3_eoa3.tar || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+[ ! -e baz ] || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -xf "${testdir}"/test3_eoa4.tar
+[ $? = 2 ] || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+#
+for i in 0 2 6 ; do
+ "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eoa1.tar.lz
+ [ $? = 2 ] || test_failed $LINENO $i
+ cmp cfoo foo || test_failed $LINENO $i
+ cmp cbar bar || test_failed $LINENO $i
+ cmp cbaz baz || test_failed $LINENO $i
+ rm -f foo bar baz || framework_failure
+ "${TARLZ}" -n$i -xf "${testdir}"/test3_eoa2.tar.lz || test_failed $LINENO $i
+ cmp cfoo foo || test_failed $LINENO $i
+ cmp cbar bar || test_failed $LINENO $i
+ cmp cbaz baz || test_failed $LINENO $i
+ rm -f foo bar baz || framework_failure
+ "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eoa4.tar.lz
+ [ $? = 2 ] || test_failed $LINENO $i
+ cmp cfoo foo || test_failed $LINENO $i
+ cmp cbar bar || test_failed $LINENO $i
+ cmp cbaz baz || test_failed $LINENO $i
+ rm -f foo bar baz || framework_failure
+ "${TARLZ}" -q -n$i -xf "${testdir}"/test3_eoa5.tar.lz
+ [ $? = 2 ] || test_failed $LINENO $i
+ cmp cfoo foo || test_failed $LINENO $i
+ cmp cbar bar || test_failed $LINENO $i
+ cmp cbaz baz || test_failed $LINENO $i
+ rm -f foo bar baz || framework_failure
+done
+"${TARLZ}" -n0 -xf "${testdir}"/test3_eoa3.tar.lz || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+[ ! -e baz ] || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+
+# test --list and --extract tar in tar.lz
+for i in "${testdir}"/tar_in_tlz1.tar.lz "${testdir}"/tar_in_tlz2.tar.lz ; do
+ for j in 0 2 6 ; do
+ "${TARLZ}" -tf "$i" -n$j > out$j ||
+ test_failed $LINENO "$i $j"
+ "${TARLZ}" -tvf "$i" -n$j > outv$j ||
+ test_failed $LINENO "$i $j"
+ done
+ diff -u out0 out2 || test_failed $LINENO "$i"
+ diff -u out0 out6 || test_failed $LINENO "$i"
+ diff -u out2 out6 || test_failed $LINENO "$i"
+ diff -u outv0 outv2 || test_failed $LINENO "$i"
+ diff -u outv0 outv6 || test_failed $LINENO "$i"
+ diff -u outv2 outv6 || test_failed $LINENO "$i"
+ rm -f out0 out2 out6 outv0 outv2 outv6 || framework_failure
+ for j in 0 2 6 ; do
+ "${TARLZ}" -xf "$i" -n$j || test_failed $LINENO "$i $j"
+ cmp "${in_tar}" test.txt.tar || test_failed $LINENO "$i $j"
+ cmp "${test3}" test3.tar || test_failed $LINENO "$i $j"
+ rm -f test.txt.tar test3.tar || framework_failure
+ done
+done
+
+# test --list and --extract with global headers uncompressed
+for i in gh1 gh2 gh3 gh4 ; do
+ "${TARLZ}" -tf "${testdir}"/test3_${i}.tar > out || test_failed $LINENO $i
+ diff -u list3 out || test_failed $LINENO $i
+ "${TARLZ}" -tvf "${testdir}"/test3_${i}.tar > out || test_failed $LINENO $i
+ diff -u vlist3 out || test_failed $LINENO $i
+ "${TARLZ}" -xf "${testdir}"/test3_${i}.tar || test_failed $LINENO $i
+ cmp cfoo foo || test_failed $LINENO $i
+ cmp cbar bar || test_failed $LINENO $i
+ cmp cbaz baz || test_failed $LINENO $i
+ rm -f foo bar baz out || framework_failure
+done
+
+# test --list and --extract with empty lzip members, global headers and
+# extended tar members split among lzip members
+for i in em1 em2 em3 em4 em5 em6 gh1 gh2 gh3 gh4 gh5 gh6 sm1 sm2 sm3 sm4 ; do
+ for j in 0 2 6 ; do
+ "${TARLZ}" -n$j -tf "${testdir}"/test3_${i}.tar.lz > out ||
+ test_failed $LINENO "$i $j"
+ diff -u list3 out || test_failed $LINENO "$i $j"
+ "${TARLZ}" -n$j -tvf "${testdir}"/test3_${i}.tar.lz > out ||
+ test_failed $LINENO "$i $j"
+ diff -u vlist3 out || test_failed $LINENO "$i $j"
+ done
+ rm -f out || framework_failure
+ for j in 0 2 6 ; do
+ "${TARLZ}" -n$j -xf "${testdir}"/test3_${i}.tar.lz ||
+ test_failed $LINENO "$i $j"
+ cmp cfoo foo || test_failed $LINENO "$i $j"
+ cmp cbar bar || test_failed $LINENO "$i $j"
+ cmp cbaz baz || test_failed $LINENO "$i $j"
+ rm -f foo bar baz || framework_failure
+ done
+done
+rm -f list3 vlist3 || framework_failure
+
+printf "\ntesting --concatenate..."
+
+# test --concatenate compressed
+cat "${in}" > out.tar.lz || framework_failure # invalid tar.lz
+"${TARLZ}" -Aqf out.tar.lz "${test3_lz}"
+[ $? = 2 ] || test_failed $LINENO
+cat "${in_tar_lz}" > out.tar.lz || framework_failure
+"${TARLZ}" -q --un -Af out.tar.lz "${test3_lz}" # contradictory ext
+[ $? = 1 ] || test_failed $LINENO
+cmp "${in_tar_lz}" out.tar.lz || test_failed $LINENO
+cat "${in_tar_lz}" > out.tar.lz || framework_failure
+"${TARLZ}" -Af out.tar.lz "${test3_lz}" || test_failed $LINENO
+"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
+cmp "${in}" test.txt || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f test.txt foo bar baz || framework_failure
+touch aout.tar.lz || framework_failure # concatenate to empty file
+"${TARLZ}" -Aqf aout.tar.lz "${in_tar}"
+[ $? = 2 ] || test_failed $LINENO
+"${TARLZ}" -Af aout.tar.lz "${in_tar_lz}" "${test3_lz}" || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -Af aout.tar.lz || test_failed $LINENO # concatenate nothing
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -Aqf aout.tar.lz aout.tar.lz || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -Aq "${in_tar_lz}" "${test3}" > aout.tar.lz # to stdout
+[ $? = 2 ] || test_failed $LINENO
+cmp "${in_tar_lz}" aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -A "${in_tar_lz}" "${test3_lz}" > aout.tar.lz || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+cat "${eoa_lz}" > aout.tar.lz || framework_failure
+"${TARLZ}" -Aqf aout.tar.lz "${in_tar}" # concatenate to empty archive
+[ $? = 2 ] || test_failed $LINENO
+"${TARLZ}" -Af aout.tar.lz "${in_tar_lz}" "${test3_lz}" || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+cat "${in_tar_lz}" > aout.tar.lz || framework_failure
+"${TARLZ}" -Aqf aout.tar.lz "${test3_lz}" "${test3}"
+[ $? = 2 ] || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+rm -f aout.tar.lz || framework_failure
+touch aout.tar.lz || framework_failure # --exclude
+"${TARLZ}" -Af aout.tar.lz "${in_tar_lz}" "${test3_lz}" --exclude 'test3*' ||
+ test_failed $LINENO
+"${TARLZ}" -Af aout.tar.lz "${in_tar_lz}" "${test3_lz}" --exclude '*txt*' ||
+ test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+rm -f out.tar.lz aout.tar.lz || framework_failure
+
+# test --concatenate uncompressed
+cat "${in}" > out.tar || framework_failure # invalid tar
+"${TARLZ}" -Aqf out.tar "${test3}"
+[ $? = 2 ] || test_failed $LINENO
+cat "${in_tar}" > out.tar || framework_failure
+"${TARLZ}" -q -0 -Af out.tar "${test3}" # contradictory ext
+[ $? = 1 ] || test_failed $LINENO
+cmp "${in_tar}" out.tar || test_failed $LINENO
+cat "${in_tar}" > out.tar || framework_failure
+"${TARLZ}" -Af out.tar "${test3}" || test_failed $LINENO
+"${TARLZ}" -xf out.tar || test_failed $LINENO
+cmp "${in}" test.txt || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f test.txt foo bar baz || framework_failure
+touch aout.tar || framework_failure # concatenate to empty file
+"${TARLZ}" -Aqf aout.tar "${in_tar_lz}"
+[ $? = 2 ] || test_failed $LINENO
+"${TARLZ}" -Af aout.tar "${in_tar}" "${test3}" || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" -Af aout.tar || test_failed $LINENO # concatenate nothing
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" -Aqf aout.tar aout.tar || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" -Aq "${in_tar}" "${test3_lz}" > aout.tar # to stdout
+[ $? = 2 ] || test_failed $LINENO
+cmp "${in_tar}" aout.tar || test_failed $LINENO
+"${TARLZ}" -A "${in_tar}" "${test3}" > aout.tar || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+cat "${eoa}" > aout.tar || framework_failure # concatenate to empty archive
+"${TARLZ}" -Aqf aout.tar "${in_tar_lz}"
+[ $? = 2 ] || test_failed $LINENO
+"${TARLZ}" -Af aout.tar "${in_tar}" "${test3}" || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+cat "${in_tar}" > aout.tar || framework_failure
+"${TARLZ}" -Aqf aout.tar "${test3}" "${test3_lz}"
+[ $? = 2 ] || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+rm -f aout.tar || framework_failure
+touch aout.tar || framework_failure # --exclude
+"${TARLZ}" -Af aout.tar "${test3}" "${in_tar}" --exclude 'test3*' ||
+ test_failed $LINENO
+"${TARLZ}" -Af aout.tar "${test3}" "${in_tar}" --exclude '*txt*' ||
+ test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+rm -f out.tar aout.tar || framework_failure
+
+printf "\ntesting --create..."
+
+# test --create
+cat "${in}" > test.txt || framework_failure
+"${TARLZ}" --warn-newer -0 -cf out.tar.lz test.txt || test_failed $LINENO
+is_compressed out.tar.lz || test_failed $LINENO
+rm -f test.txt || framework_failure
+"${TARLZ}" -xf out.tar.lz --missing-crc || test_failed $LINENO
+cmp "${in}" test.txt || test_failed $LINENO
+cat "${in}" > test.txt || framework_failure
+"${TARLZ}" --warn-newer --un -cf out.tar test.txt || test_failed $LINENO
+is_uncompressed out.tar || test_failed $LINENO
+rm -f test.txt || framework_failure
+"${TARLZ}" -xf out.tar --missing-crc || test_failed $LINENO
+cmp "${in}" test.txt || test_failed $LINENO
+rm -f test.txt out.tar out.tar.lz || framework_failure
+
+cat cfoo > foo || framework_failure
+rm -f bar || framework_failure
+cat cbaz > baz || framework_failure
+"${TARLZ}" -0 -q -cf out.tar.lz foo bar baz
+[ $? = 1 ] || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -xf out.tar.lz --missing-crc || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -xf out.tar.lz bar
+[ $? = 1 ] || test_failed $LINENO
+[ ! -e foo ] || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+[ ! -e baz ] || test_failed $LINENO
+rm -f out.tar.lz || framework_failure
+
+cat cfoo > foo || framework_failure
+cat cbar > bar || framework_failure
+cat cbaz > baz || framework_failure
+"${TARLZ}" -0 -cf out.tar.lz foo bar baz --out-slots=1 || test_failed $LINENO
+"${TARLZ}" -0 -q -cf aout.tar.lz foo bar aout.tar.lz baz || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO # test reproducible
+rm -f aout.tar.lz || framework_failure
+#
+"${TARLZ}" -0 -cf aout.tar.lz foo bar baz -C / || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+rm -f aout.tar.lz || framework_failure
+"${TARLZ}" -0 -C / -cf aout.tar.lz -C "${objdir}"/tmp foo bar baz ||
+ test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+rm -f aout.tar.lz || framework_failure
+"${TARLZ}" --asolid -0 -cf aout.tar.lz foo bar baz || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+rm -f aout.tar.lz || framework_failure
+"${TARLZ}" -0 -q -cf aout.tar.lz foo/ ./bar ./baz/ || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+rm -f aout.tar.lz || framework_failure
+mkdir dir1 || framework_failure
+"${TARLZ}" -C dir1 -xf out.tar.lz || test_failed $LINENO
+cmp cfoo dir1/foo || test_failed $LINENO
+cmp cbar dir1/bar || test_failed $LINENO
+cmp cbaz dir1/baz || test_failed $LINENO
+rm -f aout.tar.lz foo bar baz || framework_failure
+"${TARLZ}" -C dir1 -0 -cf aout.tar.lz foo bar baz || test_failed $LINENO
+"${TARLZ}" -xf aout.tar.lz || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f aout.tar.lz foo bar baz || framework_failure
+"${TARLZ}" -C dir1 -0 -c foo bar baz | "${TARLZ}" -x || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f dir1/foo dir1/bar dir1/baz || framework_failure
+"${TARLZ}" -0 -c foo bar baz | "${TARLZ}" -C dir1 -x || test_failed $LINENO
+cmp cfoo dir1/foo || test_failed $LINENO
+cmp cbar dir1/bar || test_failed $LINENO
+cmp cbaz dir1/baz || test_failed $LINENO
+rm -f dir1/foo dir1/bar dir1/baz || framework_failure
+"${TARLZ}" -0 -c foo bar baz | "${TARLZ}" -x -C dir1 foo bar baz ||
+ test_failed $LINENO
+cmp cfoo dir1/foo || test_failed $LINENO
+cmp cbar dir1/bar || test_failed $LINENO
+cmp cbaz dir1/baz || test_failed $LINENO
+rm -f foo dir1/bar baz || framework_failure
+"${TARLZ}" -0 -cf aout.tar.lz -C dir1 foo -C .. bar -C dir1 baz ||
+ test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -0 -cf aout.tar.lz dir1/foo dir1/baz || test_failed $LINENO
+rm -rf dir1 bar || framework_failure
+"${TARLZ}" -xf aout.tar.lz dir1 || test_failed $LINENO
+cmp cfoo dir1/foo || test_failed $LINENO
+cmp cbaz dir1/baz || test_failed $LINENO
+rm -rf dir1 || framework_failure
+rm -f out.tar.lz aout.tar.lz || framework_failure
+
+# test --create --exclude
+cat cfoo > foo || framework_failure
+cat cbar > bar || framework_failure
+cat cbaz > baz || framework_failure
+"${TARLZ}" -0 -cf out.tar.lz foo bar baz --exclude 'ba?' || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+[ ! -e baz ] || test_failed $LINENO
+rm -f out.tar.lz foo bar baz || framework_failure
+cat cfoo > foo || framework_failure
+cat cbar > bar || framework_failure
+cat cbaz > baz || framework_failure
+"${TARLZ}" -cf out.tar foo bar baz --exclude 'ba*' || test_failed $LINENO
+is_uncompressed out.tar || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -xf out.tar || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+[ ! -e baz ] || test_failed $LINENO
+rm -f out.tar foo bar baz || framework_failure
+
+# test --create --mtime
+dates='@-9223372036854775808 @-9223372036854775807
+ -2147481748-12-31T23:59:59 -1970-01-01T00:00:00
+ 0000-01-01T00:00:00 0000-01-01T00:00:01 0000-01-02T00:00:00
+ 1697-10-17T11:03:27 1697-10-17T11:03:28 1697-10-17T11:03:29
+ 1833-11-24T17:31:43 1833-11-24T17:31:44 1833-11-24T17:31:45
+ 1901-12-13T20:45:51 1901-12-13T20:45:52 1901-12-13T20:45:53
+ 1901-12-14T20:45:51
+ 1969-12-31T23:59:58 1969-12-31T23:59:59
+ 1970-01-01T00:00:00 1970-01-01T00:00:01 @0
+ 2038-01-18T03:14:07 2038-01-19T03:14:07 2038-01-19T03:14:08
+ 2106-02-07T06:28:15 2106-02-07T06:28:16
+ 2242-03-16T12:56:31 2242-03-16T12:56:32 @8589934591 @8589934592
+ 9999-12-31T23:59:58 9999-12-31T23:59:59
+ 2147483647-12-31T23:59:59 @9223372036854775807'
+touch -d 2022-01-05T12:22:13 bar || framework_failure
+for i in ${dates} @-8Ei '2017-10-01 09:00:00' '2017-10-1 9:0:0' \
+ '2017-10-01 09:00' '2017-10-01 09' 2017-10-01 ./bar ; do
+ touch foo || framework_failure
+ "${TARLZ}" -cf out.tar --mtime="$i" foo || test_failed $LINENO "$i"
+ is_uncompressed out.tar || test_failed $LINENO "$i"
+ "${TARLZ}" -q -df out.tar && test_failed $LINENO "$i"
+ "${TARLZ}" -xf out.tar || test_failed $LINENO "$i"
+ if [ "${d_works}" = yes ] ; then
+ "${TARLZ}" -df out.tar --ignore-overflow || test_failed $LINENO "$i"
+ fi
+done
+rm -f out.tar foo bar || framework_failure
+
+mkdir dir || framework_failure
+for i in ${dates} ; do
+ # Skip a time stamp $i if it's out of range for this platform,
+ # of if it uses a notation that this platform does not recognize.
+ touch -d "$i" "dir/f$i" >/dev/null 2>&1 || continue
+done
+"${TARLZ}" -cf out.tar dir || test_failed $LINENO
+is_uncompressed out.tar || test_failed $LINENO
+"${TARLZ}" -df out.tar || test_failed $LINENO
+rm -rf out.tar dir || framework_failure
+
+printf "\ntesting --diff..."
+
+"${TARLZ}" -xf "${test3_lz}" || test_failed $LINENO
+"${TARLZ}" -cf out.tar foo || test_failed $LINENO
+"${TARLZ}" -cf aout.tar foo --anonymous || test_failed $LINENO
+is_uncompressed out.tar || test_failed $LINENO
+is_uncompressed aout.tar || test_failed $LINENO
+if cmp out.tar aout.tar > /dev/null ; then
+ printf "\nwarning: '--diff' test can't be run as root.\n"
+else
+ for i in 0 2 6 ; do
+ "${TARLZ}" -n$i -xf "${test3_lz}" || test_failed $LINENO $i
+ "${TARLZ}" -n$i -df "${test3_lz}" > out$i
+ [ $? = 1 ] || test_failed $LINENO $i
+ "${TARLZ}" -n$i -df "${test3_lz}" --ignore-ids || test_failed $LINENO $i
+ "${TARLZ}" -n$i -df "${test3_lz}" --exclude '*' || test_failed $LINENO $i
+ "${TARLZ}" -n$i -df "${in_tar_lz}" --exclude '*' || test_failed $LINENO $i
+ rm -f bar || framework_failure
+ "${TARLZ}" -n$i -df "${test3_lz}" --ignore-ids foo baz ||
+ test_failed $LINENO $i
+ "${TARLZ}" -n$i -df "${test3_lz}" --ignore-metadata foo baz ||
+ test_failed $LINENO $i
+ "${TARLZ}" -n$i -df "${test3_lz}" --exclude bar --ignore-ids ||
+ test_failed $LINENO $i
+ rm -f foo baz || framework_failure
+ "${TARLZ}" -q -n$i -xf "${test3dir_lz}" || test_failed $LINENO $i
+ "${TARLZ}" -q -n$i -df "${test3dir_lz}" --ignore-ids ||
+ test_failed $LINENO $i
+ "${TARLZ}" -q -n$i -df "${test3dir_lz}" dir --ignore-ids ||
+ test_failed $LINENO $i
+ "${TARLZ}" -n$i -df "${test3_lz}" --ignore-ids -C dir ||
+ test_failed $LINENO $i
+ rm -rf dir || framework_failure
+ done
+ cmp out0 out2 || test_failed $LINENO
+ cmp out0 out6 || test_failed $LINENO
+ rm -f out0 out2 out6 || framework_failure
+fi
+rm -f out.tar aout.tar foo bar baz || framework_failure
+
+printf "\ntesting --delete..."
+
+# test --delete
+cat "${in}" > out.tar || framework_failure # invalid tar
+"${TARLZ}" -q -f out.tar --delete foo
+[ $? = 2 ] || test_failed $LINENO
+rm -f out.tar || framework_failure
+cat "${in}" > out.tar.lz || framework_failure # invalid tar.lz
+"${TARLZ}" -q -f out.tar.lz --delete foo
+[ $? = 2 ] || test_failed $LINENO
+cat "${in_lz}" > out.tar.lz || framework_failure # invalid tar.lz
+"${TARLZ}" -q -f out.tar.lz --delete foo
+[ $? = 2 ] || test_failed $LINENO
+rm -f out.tar.lz || framework_failure
+
+for e in "" .lz ; do
+ "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -f out.tar$e --delete test.txt || test_failed $LINENO $e
+ cmp "${test3}"$e out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -f out.tar$e --delete || test_failed $LINENO $e # delete nothing
+ cmp "${test3}"$e out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -q -f out.tar$e --delete nx_file
+ [ $? = 1 ] || test_failed $LINENO $e
+ cmp "${test3}"$e out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -q -f out.tar$e --delete test.txt || test_failed $LINENO $e
+ cmp "${test3dir}"$e out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -q -f out.tar$e --delete dir || test_failed $LINENO $e
+ cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -q -f out.tar$e --del dir/foo dir/bar dir/baz || test_failed $LINENO $e
+ cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -A "${in_tar}"$e "${test3dir}"$e > out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -q -f out.tar$e --del dir/foo dir/baz || test_failed $LINENO $e
+ cmp "${in_tar}"$e out.tar$e > /dev/null && test_failed $LINENO $e
+ "${TARLZ}" -q -f out.tar$e --del dir/bar || test_failed $LINENO $e
+ cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -f out.tar$e --delete foo bar baz || test_failed $LINENO $e
+ cmp "${in_tar}"$e out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -f out.tar$e --del test.txt foo bar baz || test_failed $LINENO $e
+ cmp "${eoa}"$e out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e
+ for i in test.txt foo bar baz ; do
+ "${TARLZ}" -f out.tar$e --delete $i || test_failed $LINENO "$e $i"
+ done
+ cmp "${eoa}"$e out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e
+ for i in baz bar foo test.txt ; do
+ "${TARLZ}" -f out.tar$e --delete $i || test_failed $LINENO "$e $i"
+ done
+ cmp "${eoa}"$e out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -A "${in_tar}"$e "${test3}"$e > out.tar$e || test_failed $LINENO $e
+ for i in foo bar test.txt baz ; do
+ "${TARLZ}" -f out.tar$e --delete $i || test_failed $LINENO "$e $i"
+ done
+ cmp "${eoa}"$e out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -A "${in_tar}"$e "${t155}"$e "${test3}"$e > out.tar$e ||
+ test_failed $LINENO $e
+ "${TARLZ}" -f out.tar$e --del baz foo test.txt bar || test_failed $LINENO $e
+ cmp "${t155}"$e out.tar$e || test_failed $LINENO $e
+ "${TARLZ}" -f out.tar$e --delete link || test_failed $LINENO $e
+ "${TARLZ}" -q -tf out.tar$e || test_failed $LINENO $e
+ cmp "${t155}"$e out.tar$e > /dev/null && test_failed $LINENO $e
+ rm -f out.tar$e || framework_failure
+done
+
+# test --delete individual member after collective member
+cat cfoo > foo || framework_failure
+cat cbar > bar || framework_failure
+cat cbaz > baz || framework_failure
+cat "${in}" > test.txt || framework_failure
+"${TARLZ}" -0 -cf out.tar.lz foo bar baz --asolid || test_failed $LINENO
+"${TARLZ}" -0 -rf out.tar.lz test.txt || test_failed $LINENO
+rm -f foo bar baz test.txt || framework_failure
+for i in foo bar baz ; do
+ "${TARLZ}" -q -f out.tar.lz --delete $i
+ [ $? = 2 ] || test_failed $LINENO $i
+done
+"${TARLZ}" -f out.tar.lz --delete test.txt || test_failed $LINENO
+"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+[ ! -e test.txt ] || test_failed $LINENO
+rm -f out.tar.lz foo bar baz test.txt || framework_failure
+
+# test --delete with empty lzip member, global header
+for i in 1 2 3 4 5 6 ; do
+ cat "${testdir}"/test3_em${i}.tar.lz > out.tar.lz || framework_failure
+ for j in foo bar baz ; do
+ "${TARLZ}" -f out.tar.lz --delete $j || test_failed $LINENO "$i $j"
+ done
+ rm -f out.tar.lz || framework_failure
+done
+cat "${testdir}"/test3_gh5.tar.lz > out.tar.lz || framework_failure
+for i in foo bar baz ; do
+ "${TARLZ}" -f out.tar.lz --delete $i || test_failed $LINENO $i
+done
+rm -f out.tar.lz || framework_failure
+for i in 1 2 3 4 ; do
+ cat "${testdir}"/test3_gh${i}.tar > out.tar || framework_failure
+ for j in foo bar baz ; do
+ "${TARLZ}" -f out.tar --delete $j || test_failed $LINENO "$i $j"
+ done
+ rm -f out.tar || framework_failure
+done
+
+printf "\ntesting --dereference..."
+
+# test --dereference
+touch dummy_file || framework_failure
+if ln dummy_file dummy_link 2> /dev/null &&
+ ln -s dummy_file dummy_slink 2> /dev/null ; then
+ ln_works=yes
+else
+ printf "\nwarning: skipping link test: 'ln' does not work on your system.\n"
+fi
+rm -f dummy_slink dummy_link dummy_file || framework_failure
+#
+if [ "${ln_works}" = yes ] ; then
+ mkdir dir || framework_failure
+ cat cfoo > dir/foo || framework_failure
+ cat cbar > dir/bar || framework_failure
+ cat cbaz > dir/baz || framework_failure
+ ln -s dir dir_link || framework_failure
+ "${TARLZ}" -0 -c dir_link > out1 || test_failed $LINENO
+ is_compressed out1 || test_failed $LINENO
+ "${TARLZ}" --un -c dir_link > out2 || test_failed $LINENO
+ is_uncompressed out2 || test_failed $LINENO
+ "${TARLZ}" -0 -n0 -c dir_link > out3 || test_failed $LINENO
+ "${TARLZ}" -0 -h -c dir_link > hout1 || test_failed $LINENO
+ "${TARLZ}" --un -h -c dir_link > hout2 || test_failed $LINENO
+ "${TARLZ}" -0 -n0 -h -c dir_link > hout3 || test_failed $LINENO
+ rm -rf dir dir_link || framework_failure
+ for i in 1 2 3 ; do
+ "${TARLZ}" -xf out$i --exclude='dir_link/*' dir_link ||
+ test_failed $LINENO $i # Cygwin stores dir_link/*
+ [ -h dir_link ] || test_failed $LINENO $i
+ "${TARLZ}" -q -tf out$i dir_link/foo && cyg_symlink $LINENO $i
+ "${TARLZ}" -q -tf out$i dir_link/bar && cyg_symlink $LINENO $i
+ "${TARLZ}" -q -tf out$i dir_link/baz && cyg_symlink $LINENO $i
+ rm -rf dir_link out$i || framework_failure
+ "${TARLZ}" -xf hout$i || test_failed $LINENO $i
+ [ -d dir_link ] || test_failed $LINENO $i
+ cmp cfoo dir_link/foo || test_failed $LINENO $i
+ cmp cbar dir_link/bar || test_failed $LINENO $i
+ cmp cbaz dir_link/baz || test_failed $LINENO $i
+ rm -rf dir_link hout$i || framework_failure
+ done
+fi
+
+printf "\ntesting --append..."
+
+# test --append compressed
+cat cfoo > foo || framework_failure
+cat cbar > bar || framework_failure
+cat cbaz > baz || framework_failure
+"${TARLZ}" -0 -cf out.tar.lz foo bar baz --out-slots=1024 || test_failed $LINENO
+"${TARLZ}" -0 -cf nout.tar.lz foo bar baz --no-solid || test_failed $LINENO
+"${TARLZ}" -0 -cf aout.tar.lz foo || test_failed $LINENO
+"${TARLZ}" -0 -rf aout.tar.lz bar baz --no-solid || test_failed $LINENO
+cmp nout.tar.lz aout.tar.lz || test_failed $LINENO
+rm -f nout.tar.lz aout.tar.lz || framework_failure
+touch aout.tar.lz || framework_failure # append to empty file
+"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -0 -rf aout.tar.lz || test_failed $LINENO # append nothing
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -0 -rf aout.tar.lz -C nx_dir || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -0 -q -rf aout.tar.lz nx_file
+[ $? = 1 ] || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -0 -q -rf aout.tar.lz aout.tar.lz || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" -0 -r foo bar baz > aout.tar.lz || test_failed $LINENO # to stdout
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+"${TARLZ}" --un -q -rf aout.tar.lz foo bar baz # contradictory ext
+[ $? = 1 ] || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+cat "${eoa_lz}" > aout.tar.lz || framework_failure # append to empty archive
+"${TARLZ}" -0 -rf aout.tar.lz foo bar baz || test_failed $LINENO
+cmp out.tar.lz aout.tar.lz || test_failed $LINENO
+rm -f out.tar.lz aout.tar.lz || framework_failure
+
+# test --append --uncompressed
+"${TARLZ}" -cf out.tar foo bar baz || test_failed $LINENO
+"${TARLZ}" -cf aout.tar foo || test_failed $LINENO
+"${TARLZ}" -rf aout.tar foo bar baz --exclude foo || test_failed $LINENO
+is_uncompressed out.tar || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+rm -f aout.tar || framework_failure
+touch aout.tar empty || framework_failure # contradictory ext empty file
+"${TARLZ}" -0 -q -rf aout.tar foo bar baz
+[ $? = 1 ] || test_failed $LINENO
+cmp aout.tar empty || test_failed $LINENO
+rm -f aout.tar empty || framework_failure
+touch aout.tar || framework_failure # append to empty file
+"${TARLZ}" -rf aout.tar foo bar baz || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" -rf aout.tar || test_failed $LINENO # append nothing
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" -rf aout.tar -C nx_dir || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" -q -rf aout.tar nx_file
+[ $? = 1 ] || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" -q -rf aout.tar aout.tar || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" --un -r foo bar baz > aout.tar || test_failed $LINENO # to stdout
+cmp out.tar aout.tar || test_failed $LINENO
+"${TARLZ}" -0 -q -rf aout.tar foo bar baz # contradictory ext
+[ $? = 1 ] || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+cat "${eoa}" > aout.tar || framework_failure # append to empty archive
+"${TARLZ}" -rf aout.tar foo bar baz || test_failed $LINENO
+cmp out.tar aout.tar || test_failed $LINENO
+rm -f out.tar aout.tar || framework_failure
+
+# test --append to solid archive
+"${TARLZ}" --solid -q -0 -cf out.tar.lz "${in}" foo bar || test_failed $LINENO
+"${TARLZ}" -q -tf out.tar.lz || test_failed $LINENO # compressed seekable
+cat out.tar.lz > aout.tar.lz || framework_failure
+for i in --asolid --bsolid --dsolid --solid -0 ; do
+ "${TARLZ}" $i -q -rf out.tar.lz baz
+ [ $? = 2 ] || test_failed $LINENO $i
+ cmp out.tar.lz aout.tar.lz || test_failed $LINENO $i
+done
+rm -f out.tar.lz aout.tar.lz || framework_failure
+for i in --asolid --bsolid --dsolid -0 ; do
+ for j in --asolid --bsolid --dsolid --solid -0 ; do
+ "${TARLZ}" $i -0 -cf out.tar.lz foo || test_failed $LINENO "$i $j"
+ "${TARLZ}" $j -0 -rf out.tar.lz bar baz || test_failed $LINENO "$i $j"
+ rm -f foo bar baz || framework_failure
+ "${TARLZ}" -xf out.tar.lz || test_failed $LINENO "$i $j"
+ cmp cfoo foo || test_failed $LINENO "$i $j"
+ cmp cbar bar || test_failed $LINENO "$i $j"
+ cmp cbaz baz || test_failed $LINENO "$i $j"
+ rm -f out.tar.lz || framework_failure
+ done
+done
+rm -f foo bar baz || framework_failure
+
+printf "\ntesting dirs and links..."
+
+# test -c -d -x on directories and links
+mkdir dir1 || framework_failure
+"${TARLZ}" -0 -cf out.tar.lz dir1 || test_failed $LINENO
+rmdir dir1 || framework_failure
+"${TARLZ}" -xf out.tar.lz || test_failed $LINENO
+[ -d dir1 ] || test_failed $LINENO
+rmdir dir1
+rm -f out.tar.lz || framework_failure
+mkdir dir1 || framework_failure
+"${TARLZ}" -cf out.tar dir1 || test_failed $LINENO
+is_uncompressed out.tar || test_failed $LINENO
+rmdir dir1 || framework_failure
+"${TARLZ}" -xf out.tar || test_failed $LINENO
+[ -d dir1 ] || test_failed $LINENO
+rmdir dir1
+rm -f out.tar || framework_failure
+
+if [ "${ln_works}" = yes ] ; then
+ name_100=name_100_bytes_long_nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn
+ path_100=dir1/dir2/dir3/path_100_bytes_long_nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn
+ path_106=dir1/dir2/dir3/path_longer_than_100_bytes_nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn
+ mkdir dir1 || framework_failure
+ mkdir dir1/dir2 || framework_failure
+ mkdir dir1/dir2/dir3 || framework_failure
+ cat "${in}" > dir1/dir2/dir3/in || framework_failure
+ ln dir1/dir2/dir3/in dir1/dir2/dir3/"${name_100}" || framework_failure
+ ln dir1/dir2/dir3/in "${path_100}" || framework_failure
+ ln dir1/dir2/dir3/in "${path_106}" || framework_failure
+ ln -s dir2/ dir1/dir2_link || framework_failure
+ ln -s in dir1/dir2/dir3/link || framework_failure
+ ln -s "${name_100}" dir1/dir2/dir3/link_100 || framework_failure
+ "${TARLZ}" -0 -cf out.tar.lz dir1 || test_failed $LINENO
+ "${TARLZ}" -df out.tar.lz || test_failed $LINENO
+ rm -rf dir1 || framework_failure
+ "${TARLZ}" -xf out.tar.lz || test_failed $LINENO
+ "${TARLZ}" -df out.tar.lz || test_failed $LINENO
+ cmp "${in}" dir1/dir2/dir3/in || test_failed $LINENO
+ cmp "${in}" dir1/dir2_link/dir3/in || test_failed $LINENO
+ cmp "${in}" dir1/dir2/dir3/"${name_100}" || test_failed $LINENO
+ cmp "${in}" "${path_100}" || test_failed $LINENO
+ cmp "${in}" "${path_106}" || test_failed $LINENO
+ cmp "${in}" dir1/dir2/dir3/link || test_failed $LINENO
+ cmp "${in}" dir1/dir2/dir3/link_100 || test_failed $LINENO
+ rm -f dir1/dir2/dir3/in || framework_failure
+ cmp "${in}" dir1/dir2/dir3/link 2> /dev/null && test_failed $LINENO
+ cmp "${in}" dir1/dir2/dir3/link_100 || test_failed $LINENO
+ "${TARLZ}" -xf out.tar.lz || test_failed $LINENO
+ rm -f out.tar.lz || framework_failure
+ cmp "${in}" dir1/dir2/dir3/in || test_failed $LINENO
+ cmp "${in}" dir1/dir2/dir3/link || test_failed $LINENO
+ "${TARLZ}" -0 -q -c ../tmp/dir1 | "${TARLZ}" -x || test_failed $LINENO
+ diff -ru tmp/dir1 dir1 || test_failed $LINENO
+ rm -rf tmp dir1 || framework_failure
+ # test -c -d -x on dangling (broken) symlinks with trailing slashes
+ "${TARLZ}" -xf "${testdir}"/ts_in_link.tar.lz || test_failed $LINENO
+ "${TARLZ}" -df "${testdir}"/ts_in_link.tar.lz --ignore-ids ||
+ test_failed $LINENO
+ "${TARLZ}" -0 -cf out.tar.lz link1 link2 link3 link4 || test_failed $LINENO
+ "${TARLZ}" -df out.tar.lz || test_failed $LINENO
+ rm -f out.tar.lz link1 link2 link3 link4 || framework_failure
+fi
+
+printf "\ntesting long names..."
+
+"${TARLZ}" -q -tf "${t155}" || test_failed $LINENO
+"${TARLZ}" -q -tf "${t155_lz}" || test_failed $LINENO
+if [ "${ln_works}" = yes ] ; then
+ mkdir dir1 || framework_failure
+ "${TARLZ}" -C dir1 -xf "${t155}" || test_failed $LINENO
+ mkdir dir2 || framework_failure
+ "${TARLZ}" -C dir2 -xf "${t155_lz}" || test_failed $LINENO
+ diff -ru dir1 dir2 || test_failed $LINENO
+ "${TARLZ}" -cf out.tar.lz dir2 || test_failed $LINENO
+ rm -rf dir2 || framework_failure
+ "${TARLZ}" -xf out.tar.lz || test_failed $LINENO
+ diff -ru dir1 dir2 || test_failed $LINENO
+ rmdir dir2 2> /dev/null && test_failed $LINENO
+ rmdir dir1 2> /dev/null && test_failed $LINENO
+ rm -rf out.tar.lz dir2 dir1 || framework_failure
+fi
+
+"${TARLZ}" -tvf "${testdir}"/ug32chars.tar.lz | grep -q \
+ -e very_long_owner_name_of_32_chars/very_long_group_name_of_32_chars ||
+ test_failed $LINENO
+"${TARLZ}" -tvf "${testdir}"/ug32chars.tar.lz | grep -q \
+ -e very_long_owner_name_of_32_charsvery_long_group_name_of_32_chars &&
+ test_failed $LINENO
+"${TARLZ}" -tvf "${testdir}"/ug32chars.tar.lz | grep -q \
+ -e very_long_group_name_of_32_chars/very_long_group_name_of_32_chars &&
+ test_failed $LINENO
+"${TARLZ}" -xf "${testdir}"/ug32chars.tar.lz || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+rm -f foo || framework_failure
+"${TARLZ}" -tvf "${testdir}"/ug32767.tar.lz | grep -q -e 32767/32767 ||
+ test_failed $LINENO
+"${TARLZ}" -xf "${testdir}"/ug32767.tar.lz || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+rm -f foo || framework_failure
+
+printf "\ntesting --compress..."
+
+cat cfoo > foo || framework_failure
+cat cbar > bar || framework_failure
+cat cbaz > baz || framework_failure
+cat "${in}" > test.txt || framework_failure
+"${TARLZ}" -cf out.tar test.txt foo bar baz test.txt || test_failed $LINENO
+"${TARLZ}" -cf out3.tar foo bar baz || test_failed $LINENO
+cat out.tar > outz.tar || framework_failure
+cat out3.tar > out3z.tar || framework_failure
+#
+"${TARLZ}" -0 -z outz.tar out3z.tar || test_failed $LINENO
+"${TARLZ}" -q -tf outz.tar.lz || test_failed $LINENO
+"${TARLZ}" -q -tf out3z.tar.lz || test_failed $LINENO
+cat outz.tar.lz > out || test_failed $LINENO
+cat out3z.tar.lz > out3 || test_failed $LINENO
+rm -f out3z.tar.lz || framework_failure
+"${TARLZ}" -q -0 -z outz.tar out3z.tar # outz.tar.lz exists
+[ $? = 1 ] || test_failed $LINENO
+cmp out outz.tar.lz || test_failed $LINENO
+cmp out3 out3z.tar.lz || test_failed $LINENO
+if [ "${ln_works}" = yes ] ; then
+ ln -s outz.tar loutz.tar || framework_failure
+ "${TARLZ}" -0 -z loutz.tar || test_failed $LINENO
+ cmp loutz.tar.lz outz.tar.lz || test_failed $LINENO
+ rm -f loutz.tar.lz loutz.tar || framework_failure
+fi
+rm -f out out3 outz.tar.lz out3z.tar.lz || framework_failure
+#
+for i in --solid --no-solid ; do
+ "${TARLZ}" -0 -n0 $i -cf out.tar.lz test.txt foo bar baz test.txt || test_failed $LINENO $i
+ "${TARLZ}" -0 -z -o - $i out.tar | cmp out.tar.lz - || test_failed $LINENO $i
+ "${TARLZ}" -0 -n0 $i -cf out3.tar.lz foo bar baz || test_failed $LINENO $i
+ "${TARLZ}" -0 -z -o - $i out3.tar | cmp out3.tar.lz - || test_failed $LINENO $i
+ "${TARLZ}" -0 -z $i outz.tar out3z.tar || test_failed $LINENO $i
+ cmp out.tar.lz outz.tar.lz || test_failed $LINENO $i
+ cmp out3.tar.lz out3z.tar.lz || test_failed $LINENO $i
+ rm -f outz.tar.lz out3z.tar.lz || framework_failure
+done
+#
+"${TARLZ}" -0 -B8KiB -n0 --bsolid -cf out.tar.lz test.txt foo bar baz test.txt || test_failed $LINENO
+"${TARLZ}" -0 -B8KiB -z -o - --bsolid out.tar | cmp out.tar.lz - || test_failed $LINENO
+"${TARLZ}" -0 -B8KiB -z -o out --bsolid out.tar || test_failed $LINENO
+cmp out.tar.lz out || test_failed $LINENO
+"${TARLZ}" -0 -B8KiB -z --bsolid outz.tar || test_failed $LINENO
+cmp out.tar.lz outz.tar.lz || test_failed $LINENO
+rm -f out outz.tar.lz || framework_failure
+"${TARLZ}" -0 -B8KiB -z -o a/b/c/out --bsolid out.tar || test_failed $LINENO
+cmp out.tar.lz a/b/c/out || test_failed $LINENO
+rm -rf a || framework_failure
+#
+"${TARLZ}" -0 -n0 --asolid -cf out.tar.lz test.txt foo bar baz test.txt || test_failed $LINENO
+"${TARLZ}" -0 -n0 --asolid -cf out3.tar.lz foo bar baz || test_failed $LINENO
+for i in --asolid --bsolid --dsolid ; do
+ cat out.tar | "${TARLZ}" -0 -z $i | cmp out.tar.lz - || test_failed $LINENO $i
+ "${TARLZ}" -0 -z -o out $i out.tar || test_failed $LINENO $i
+ cmp out.tar.lz out || test_failed $LINENO $i
+ "${TARLZ}" -0 -z $i outz.tar out3z.tar || test_failed $LINENO $i
+ cmp out.tar.lz outz.tar.lz || test_failed $LINENO $i
+ cmp out3.tar.lz out3z.tar.lz || test_failed $LINENO $i
+ rm -f out outz.tar.lz out3z.tar.lz || framework_failure
+done
+# concatenate and compress
+"${TARLZ}" -cf foo.tar foo || test_failed $LINENO
+"${TARLZ}" -cf bar.tar bar || test_failed $LINENO
+"${TARLZ}" -cf baz.tar baz || test_failed $LINENO
+"${TARLZ}" -A foo.tar bar.tar baz.tar | "${TARLZ}" -0 -z -o foobarbaz.tar.lz ||
+ test_failed $LINENO
+cmp out3.tar.lz foobarbaz.tar.lz || test_failed $LINENO
+"${TARLZ}" -A foo.tar bar.tar baz.tar | "${TARLZ}" -0 -z > foobarbaz.tar.lz ||
+ test_failed $LINENO
+cmp out3.tar.lz foobarbaz.tar.lz || test_failed $LINENO
+# compress and concatenate
+"${TARLZ}" -0 -z foo.tar bar.tar baz.tar || test_failed $LINENO
+"${TARLZ}" -A foo.tar.lz bar.tar.lz baz.tar.lz > foobarbaz.tar.lz ||
+ test_failed $LINENO
+"${TARLZ}" -0 -n0 --no-solid -c foo bar baz | cmp foobarbaz.tar.lz - ||
+ test_failed $LINENO
+rm -f foo bar baz test.txt out.tar.lz out.tar outz.tar foobarbaz.tar.lz \
+ out3.tar out3.tar.lz out3z.tar foo.tar bar.tar baz.tar \
+ foo.tar.lz bar.tar.lz baz.tar.lz || framework_failure
+
+printf "\ntesting bad input..."
+
+# test --extract ".."
+mkdir dir1 || framework_failure
+cd dir1 || framework_failure
+for i in 0 2 ; do # try serial and parallel decoders
+ "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot1.tar.lz || test_failed $LINENO $i
+ [ ! -e ../dir ] || test_failed $LINENO $i
+ "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot2.tar.lz || test_failed $LINENO $i
+ [ ! -e ../dir ] || test_failed $LINENO $i
+ "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot3.tar.lz || test_failed $LINENO $i
+ [ ! -e dir ] || test_failed $LINENO $i
+ "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot4.tar.lz || test_failed $LINENO $i
+ [ ! -e dir ] || test_failed $LINENO $i
+ "${TARLZ}" -q -n$i -xf "${testdir}"/dotdot5.tar.lz || test_failed $LINENO $i
+ [ ! -e dir ] || test_failed $LINENO $i
+done
+cd .. || framework_failure
+rm -rf dir1 || framework_failure
+
+# test --list and --extract truncated tar
+dd if="${in_tar}" of=truncated.tar bs=1000 count=1 2> /dev/null
+"${TARLZ}" -q -tf truncated.tar > /dev/null
+[ $? = 2 ] || test_failed $LINENO
+"${TARLZ}" -q -xf truncated.tar
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e test.txt ] || test_failed $LINENO
+rm -f truncated.tar || framework_failure
+
+# test --delete with split 'bar' tar member
+for i in 1 2 3 4 ; do
+ cat "${testdir}"/test3_sm${i}.tar.lz > out.tar.lz || framework_failure
+ for j in bar baz ; do
+ "${TARLZ}" -q -f out.tar.lz --delete $j
+ [ $? = 2 ] || test_failed $LINENO "$i $j"
+ done
+ cmp "${testdir}"/test3_sm${i}.tar.lz out.tar.lz || test_failed $LINENO $i
+ "${TARLZ}" -q -f out.tar.lz --delete foo
+ [ $? = 2 ] || test_failed $LINENO $i
+ "${TARLZ}" -xf out.tar.lz || test_failed $LINENO $i
+ [ ! -e foo ] || test_failed $LINENO $i
+ cmp cbar bar || test_failed $LINENO $i
+ cmp cbaz baz || test_failed $LINENO $i
+ rm -f out.tar.lz foo bar baz || framework_failure
+done
+
+# test --list and --extract format violations
+if [ "${ln_works}" = yes ] ; then
+ mkdir dir1 || framework_failure
+ "${TARLZ}" -C dir1 -xf "${t155}" || test_failed $LINENO
+fi
+for i in 1 2 3 ; do
+ "${TARLZ}" -q -tf "${testdir}"/t155_fv${i}.tar
+ [ $? = 2 ] || test_failed $LINENO $i
+ "${TARLZ}" -q -tf "${testdir}"/t155_fv${i}.tar --permissive ||
+ test_failed $LINENO $i
+ if [ "${ln_works}" = yes ] ; then
+ mkdir dir2 || framework_failure
+ "${TARLZ}" -C dir2 -xf "${testdir}"/t155_fv${i}.tar --permissive ||
+ test_failed $LINENO $i
+ diff -ru dir1 dir2 || test_failed $LINENO $i
+ rm -rf dir2 || framework_failure
+ fi
+done
+for i in 1 2 3 4 5 6 ; do
+ "${TARLZ}" -q -tf "${testdir}"/t155_fv${i}.tar.lz
+ [ $? = 2 ] || test_failed $LINENO $i
+ "${TARLZ}" -q -tf "${testdir}"/t155_fv${i}.tar.lz --permissive ||
+ test_failed $LINENO $i
+ if [ "${ln_works}" = yes ] ; then
+ mkdir dir2 || framework_failure
+ "${TARLZ}" -n4 -C dir2 -xf "${testdir}"/t155_fv${i}.tar.lz --permissive ||
+ test_failed $LINENO $i
+ diff -ru dir1 dir2 || test_failed $LINENO $i
+ rm -rf dir2 || framework_failure
+ fi
+done
+if [ "${ln_works}" = yes ] ; then rm -rf dir1 || framework_failure ; fi
+
+for i in "${testdir}"/test3_nn.tar "${testdir}"/test3_nn.tar.lz ; do
+ "${TARLZ}" -q -n0 -tf "$i" || test_failed $LINENO "$i"
+ "${TARLZ}" -q -n4 -tf "$i" || test_failed $LINENO "$i"
+ "${TARLZ}" -q -n0 -xf "$i" || test_failed $LINENO "$i"
+ if [ "${d_works}" = yes ] ; then
+ "${TARLZ}" -n0 -df "$i" --ignore-ids || test_failed $LINENO "$i"
+ fi
+ cmp cfoo foo || test_failed $LINENO "$i"
+ [ ! -e bar ] || test_failed $LINENO "$i"
+ cmp cbaz baz || test_failed $LINENO "$i"
+ rm -f foo bar baz || framework_failure
+ "${TARLZ}" -q -n4 -xf "$i" || test_failed $LINENO "$i"
+ if [ "${d_works}" = yes ] ; then
+ "${TARLZ}" -n4 -df "$i" --ignore-ids || test_failed $LINENO "$i"
+ fi
+ cmp cfoo foo || test_failed $LINENO "$i"
+ [ ! -e bar ] || test_failed $LINENO "$i"
+ cmp cbaz baz || test_failed $LINENO "$i"
+ rm -f foo bar baz || framework_failure
+done
+
+printf "\ntesting --keep-damaged..."
+
+# test --extract and --keep-damaged compressed
+rm -f test.txt || framework_failure
+for i in "${inbad1}" "${inbad2}" ; do
+ "${TARLZ}" -q -xf "${i}.tar.lz"
+ [ $? = 2 ] || test_failed $LINENO "$i"
+ [ ! -e test.txt ] || test_failed $LINENO "$i"
+ rm -f test.txt || framework_failure
+ "${TARLZ}" -q -n0 -xf "${i}.tar.lz" --keep-damaged
+ [ $? = 2 ] || test_failed $LINENO "$i"
+ [ -e test.txt ] || test_failed $LINENO "$i"
+ cmp "$i" test.txt 2> /dev/null || test_failed $LINENO "$i"
+ rm -f test.txt || framework_failure
+done
+#
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -n0 -xf "${bad1_lz}"
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e foo ] || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -n0 -xf "${bad2_lz}"
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e foo ] || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -n0 -xf "${bad3_lz}"
+[ $? = 2 ] || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -n0 -xf "${bad3_lz}" --keep-damaged
+[ $? = 2 ] || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar 2> /dev/null || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -n0 -xf "${bad4_lz}"
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e foo ] || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -n0 -xf "${bad4_lz}" --keep-damaged
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e foo ] || test_failed $LINENO
+cmp cbar bar 2> /dev/null || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -n0 -xf "${bad5_lz}"
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e foo ] || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -n0 -xf "${bad5_lz}" --keep-damaged
+[ $? = 2 ] || test_failed $LINENO
+cmp cfoo foo 2> /dev/null || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -n0 -xf "${bad6_lz}"
+[ $? = 2 ] || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+
+# test --extract and --keep-damaged uncompressed
+rm -f test.txt || framework_failure
+"${TARLZ}" -q -xf "${inbad1}.tar"
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e test.txt ] || test_failed $LINENO
+rm -f test.txt || framework_failure
+"${TARLZ}" -q -xf "${inbad1}.tar" --keep-damaged
+[ $? = 2 ] || test_failed $LINENO
+[ -e test.txt ] || test_failed $LINENO
+cmp "${inbad1}" test.txt 2> /dev/null || test_failed $LINENO
+rm -f test.txt || framework_failure
+#
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -xf "${bad1}"
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e foo ] || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -xf "${bad2}"
+[ $? = 2 ] || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -xf "${bad3}"
+[ $? = 2 ] || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+[ ! -e baz ] || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -xf "${bad4}"
+[ $? = 2 ] || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+[ ! -e baz ] || test_failed $LINENO
+rm -f foo bar baz || framework_failure
+"${TARLZ}" -q -xf "${bad5}"
+[ $? = 2 ] || test_failed $LINENO
+cmp cfoo foo || test_failed $LINENO
+cmp cbar bar || test_failed $LINENO
+cmp cbaz baz || test_failed $LINENO
+rm -f cfoo cbar cbaz foo bar baz || framework_failure
+#
+rm -f test3.tar.lz || framework_failure
+"${TARLZ}" -q -xf "${tlzit1}"
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e foo ] || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+[ ! -e baz ] || test_failed $LINENO
+[ ! -e test3.tar.lz ] || test_failed $LINENO
+rm -f foo bar baz test3.tar.lz || framework_failure
+"${TARLZ}" -q -xf "${tlzit2}"
+[ $? = 2 ] || test_failed $LINENO
+[ ! -e foo ] || test_failed $LINENO
+[ ! -e bar ] || test_failed $LINENO
+[ ! -e baz ] || test_failed $LINENO
+cmp "${test3_lz}" test3.tar.lz || test_failed $LINENO
+rm -f foo bar baz test3.tar.lz || framework_failure
+
+echo
+if [ ${fail} = 0 ] ; then
+ echo "tests completed successfully."
+ cd "${objdir}" && rm -r tmp
+else
+ echo "tests failed."
+fi
+exit ${fail}
diff --git a/testsuite/dotdot1.tar.lz b/testsuite/dotdot1.tar.lz
new file mode 100644
index 0000000..9884d9f
--- /dev/null
+++ b/testsuite/dotdot1.tar.lz
Binary files differ
diff --git a/testsuite/dotdot2.tar.lz b/testsuite/dotdot2.tar.lz
new file mode 100644
index 0000000..a60b898
--- /dev/null
+++ b/testsuite/dotdot2.tar.lz
Binary files differ
diff --git a/testsuite/dotdot3.tar.lz b/testsuite/dotdot3.tar.lz
new file mode 100644
index 0000000..163fb5c
--- /dev/null
+++ b/testsuite/dotdot3.tar.lz
Binary files differ
diff --git a/testsuite/dotdot4.tar.lz b/testsuite/dotdot4.tar.lz
new file mode 100644
index 0000000..8c6a0ee
--- /dev/null
+++ b/testsuite/dotdot4.tar.lz
Binary files differ
diff --git a/testsuite/dotdot5.tar.lz b/testsuite/dotdot5.tar.lz
new file mode 100644
index 0000000..a62cd18
--- /dev/null
+++ b/testsuite/dotdot5.tar.lz
Binary files differ
diff --git a/testsuite/eoa_blocks.tar b/testsuite/eoa_blocks.tar
new file mode 100644
index 0000000..06d7405
--- /dev/null
+++ b/testsuite/eoa_blocks.tar
Binary files differ
diff --git a/testsuite/eoa_blocks.tar.lz b/testsuite/eoa_blocks.tar.lz
new file mode 100644
index 0000000..328273c
--- /dev/null
+++ b/testsuite/eoa_blocks.tar.lz
Binary files differ
diff --git a/testsuite/rbar b/testsuite/rbar
new file mode 100644
index 0000000..5716ca5
--- /dev/null
+++ b/testsuite/rbar
@@ -0,0 +1 @@
+bar
diff --git a/testsuite/rbaz b/testsuite/rbaz
new file mode 100644
index 0000000..7601807
--- /dev/null
+++ b/testsuite/rbaz
@@ -0,0 +1 @@
+baz
diff --git a/testsuite/rfoo b/testsuite/rfoo
new file mode 100644
index 0000000..257cc56
--- /dev/null
+++ b/testsuite/rfoo
@@ -0,0 +1 @@
+foo
diff --git a/testsuite/t155.tar b/testsuite/t155.tar
new file mode 100644
index 0000000..f2b8a4e
--- /dev/null
+++ b/testsuite/t155.tar
Binary files differ
diff --git a/testsuite/t155.tar.lz b/testsuite/t155.tar.lz
new file mode 100644
index 0000000..edc7f04
--- /dev/null
+++ b/testsuite/t155.tar.lz
Binary files differ
diff --git a/testsuite/t155_fv1.tar b/testsuite/t155_fv1.tar
new file mode 100644
index 0000000..1ef64c3
--- /dev/null
+++ b/testsuite/t155_fv1.tar
Binary files differ
diff --git a/testsuite/t155_fv1.tar.lz b/testsuite/t155_fv1.tar.lz
new file mode 100644
index 0000000..896925e
--- /dev/null
+++ b/testsuite/t155_fv1.tar.lz
Binary files differ
diff --git a/testsuite/t155_fv2.tar b/testsuite/t155_fv2.tar
new file mode 100644
index 0000000..f732b30
--- /dev/null
+++ b/testsuite/t155_fv2.tar
Binary files differ
diff --git a/testsuite/t155_fv2.tar.lz b/testsuite/t155_fv2.tar.lz
new file mode 100644
index 0000000..b380105
--- /dev/null
+++ b/testsuite/t155_fv2.tar.lz
Binary files differ
diff --git a/testsuite/t155_fv3.tar b/testsuite/t155_fv3.tar
new file mode 100644
index 0000000..fe5db13
--- /dev/null
+++ b/testsuite/t155_fv3.tar
Binary files differ
diff --git a/testsuite/t155_fv3.tar.lz b/testsuite/t155_fv3.tar.lz
new file mode 100644
index 0000000..aa24c0a
--- /dev/null
+++ b/testsuite/t155_fv3.tar.lz
Binary files differ
diff --git a/testsuite/t155_fv4.tar.lz b/testsuite/t155_fv4.tar.lz
new file mode 100644
index 0000000..e3ae9c3
--- /dev/null
+++ b/testsuite/t155_fv4.tar.lz
Binary files differ
diff --git a/testsuite/t155_fv5.tar.lz b/testsuite/t155_fv5.tar.lz
new file mode 100644
index 0000000..966015a
--- /dev/null
+++ b/testsuite/t155_fv5.tar.lz
Binary files differ
diff --git a/testsuite/t155_fv6.tar.lz b/testsuite/t155_fv6.tar.lz
new file mode 100644
index 0000000..bc83237
--- /dev/null
+++ b/testsuite/t155_fv6.tar.lz
Binary files differ
diff --git a/testsuite/tar_in_tlz1.tar.lz b/testsuite/tar_in_tlz1.tar.lz
new file mode 100644
index 0000000..bf04f25
--- /dev/null
+++ b/testsuite/tar_in_tlz1.tar.lz
Binary files differ
diff --git a/testsuite/tar_in_tlz2.tar.lz b/testsuite/tar_in_tlz2.tar.lz
new file mode 100644
index 0000000..de8453b
--- /dev/null
+++ b/testsuite/tar_in_tlz2.tar.lz
Binary files differ
diff --git a/testsuite/test.txt b/testsuite/test.txt
new file mode 100644
index 0000000..9196a3a
--- /dev/null
+++ b/testsuite/test.txt
@@ -0,0 +1,676 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) <year> <name of author>
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) <year> <name of author>
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/testsuite/test.txt.lz b/testsuite/test.txt.lz
new file mode 100644
index 0000000..46f98a7
--- /dev/null
+++ b/testsuite/test.txt.lz
Binary files differ
diff --git a/testsuite/test.txt.tar b/testsuite/test.txt.tar
new file mode 100644
index 0000000..d687b43
--- /dev/null
+++ b/testsuite/test.txt.tar
Binary files differ
diff --git a/testsuite/test.txt.tar.lz b/testsuite/test.txt.tar.lz
new file mode 100644
index 0000000..306eeeb
--- /dev/null
+++ b/testsuite/test.txt.tar.lz
Binary files differ
diff --git a/testsuite/test3.tar b/testsuite/test3.tar
new file mode 100644
index 0000000..d58fb45
--- /dev/null
+++ b/testsuite/test3.tar
Binary files differ
diff --git a/testsuite/test3.tar.lz b/testsuite/test3.tar.lz
new file mode 100644
index 0000000..779ace4
--- /dev/null
+++ b/testsuite/test3.tar.lz
Binary files differ
diff --git a/testsuite/test3_bad1.tar b/testsuite/test3_bad1.tar
new file mode 100644
index 0000000..005b6a3
--- /dev/null
+++ b/testsuite/test3_bad1.tar
Binary files differ
diff --git a/testsuite/test3_bad1.tar.lz b/testsuite/test3_bad1.tar.lz
new file mode 100644
index 0000000..9f5d40f
--- /dev/null
+++ b/testsuite/test3_bad1.tar.lz
Binary files differ
diff --git a/testsuite/test3_bad2.tar b/testsuite/test3_bad2.tar
new file mode 100644
index 0000000..d2c546b
--- /dev/null
+++ b/testsuite/test3_bad2.tar
Binary files differ
diff --git a/testsuite/test3_bad2.tar.lz b/testsuite/test3_bad2.tar.lz
new file mode 100644
index 0000000..182c048
--- /dev/null
+++ b/testsuite/test3_bad2.tar.lz
Binary files differ
diff --git a/testsuite/test3_bad3.tar b/testsuite/test3_bad3.tar
new file mode 100644
index 0000000..7d78e87
--- /dev/null
+++ b/testsuite/test3_bad3.tar
Binary files differ
diff --git a/testsuite/test3_bad3.tar.lz b/testsuite/test3_bad3.tar.lz
new file mode 100644
index 0000000..3b46163
--- /dev/null
+++ b/testsuite/test3_bad3.tar.lz
Binary files differ
diff --git a/testsuite/test3_bad4.tar b/testsuite/test3_bad4.tar
new file mode 100644
index 0000000..68312b3
--- /dev/null
+++ b/testsuite/test3_bad4.tar
Binary files differ
diff --git a/testsuite/test3_bad4.tar.lz b/testsuite/test3_bad4.tar.lz
new file mode 100644
index 0000000..7ac6d98
--- /dev/null
+++ b/testsuite/test3_bad4.tar.lz
Binary files differ
diff --git a/testsuite/test3_bad5.tar b/testsuite/test3_bad5.tar
new file mode 100644
index 0000000..e482969
--- /dev/null
+++ b/testsuite/test3_bad5.tar
Binary files differ
diff --git a/testsuite/test3_bad5.tar.lz b/testsuite/test3_bad5.tar.lz
new file mode 100644
index 0000000..5b4feb3
--- /dev/null
+++ b/testsuite/test3_bad5.tar.lz
Binary files differ
diff --git a/testsuite/test3_bad6.tar.lz b/testsuite/test3_bad6.tar.lz
new file mode 100644
index 0000000..42b3888
--- /dev/null
+++ b/testsuite/test3_bad6.tar.lz
Binary files differ
diff --git a/testsuite/test3_dir.tar b/testsuite/test3_dir.tar
new file mode 100644
index 0000000..e0c2b29
--- /dev/null
+++ b/testsuite/test3_dir.tar
Binary files differ
diff --git a/testsuite/test3_dir.tar.lz b/testsuite/test3_dir.tar.lz
new file mode 100644
index 0000000..8eb3f43
--- /dev/null
+++ b/testsuite/test3_dir.tar.lz
Binary files differ
diff --git a/testsuite/test3_dot.tar.lz b/testsuite/test3_dot.tar.lz
new file mode 100644
index 0000000..8fd3d1f
--- /dev/null
+++ b/testsuite/test3_dot.tar.lz
Binary files differ
diff --git a/testsuite/test3_em1.tar.lz b/testsuite/test3_em1.tar.lz
new file mode 100644
index 0000000..0aa8724
--- /dev/null
+++ b/testsuite/test3_em1.tar.lz
Binary files differ
diff --git a/testsuite/test3_em2.tar.lz b/testsuite/test3_em2.tar.lz
new file mode 100644
index 0000000..4fe4e5d
--- /dev/null
+++ b/testsuite/test3_em2.tar.lz
Binary files differ
diff --git a/testsuite/test3_em3.tar.lz b/testsuite/test3_em3.tar.lz
new file mode 100644
index 0000000..49e2eab
--- /dev/null
+++ b/testsuite/test3_em3.tar.lz
Binary files differ
diff --git a/testsuite/test3_em4.tar.lz b/testsuite/test3_em4.tar.lz
new file mode 100644
index 0000000..95df508
--- /dev/null
+++ b/testsuite/test3_em4.tar.lz
Binary files differ
diff --git a/testsuite/test3_em5.tar.lz b/testsuite/test3_em5.tar.lz
new file mode 100644
index 0000000..706beb5
--- /dev/null
+++ b/testsuite/test3_em5.tar.lz
Binary files differ
diff --git a/testsuite/test3_em6.tar.lz b/testsuite/test3_em6.tar.lz
new file mode 100644
index 0000000..806884d
--- /dev/null
+++ b/testsuite/test3_em6.tar.lz
Binary files differ
diff --git a/testsuite/test3_eoa1.tar b/testsuite/test3_eoa1.tar
new file mode 100644
index 0000000..175b807
--- /dev/null
+++ b/testsuite/test3_eoa1.tar
Binary files differ
diff --git a/testsuite/test3_eoa1.tar.lz b/testsuite/test3_eoa1.tar.lz
new file mode 100644
index 0000000..0eb86e4
--- /dev/null
+++ b/testsuite/test3_eoa1.tar.lz
Binary files differ
diff --git a/testsuite/test3_eoa2.tar b/testsuite/test3_eoa2.tar
new file mode 100644
index 0000000..458be1e
--- /dev/null
+++ b/testsuite/test3_eoa2.tar
Binary files differ
diff --git a/testsuite/test3_eoa2.tar.lz b/testsuite/test3_eoa2.tar.lz
new file mode 100644
index 0000000..1f47953
--- /dev/null
+++ b/testsuite/test3_eoa2.tar.lz
Binary files differ
diff --git a/testsuite/test3_eoa3.tar b/testsuite/test3_eoa3.tar
new file mode 100644
index 0000000..3003a93
--- /dev/null
+++ b/testsuite/test3_eoa3.tar
Binary files differ
diff --git a/testsuite/test3_eoa3.tar.lz b/testsuite/test3_eoa3.tar.lz
new file mode 100644
index 0000000..20ba9f8
--- /dev/null
+++ b/testsuite/test3_eoa3.tar.lz
Binary files differ
diff --git a/testsuite/test3_eoa4.tar b/testsuite/test3_eoa4.tar
new file mode 100644
index 0000000..4012fea
--- /dev/null
+++ b/testsuite/test3_eoa4.tar
Binary files differ
diff --git a/testsuite/test3_eoa4.tar.lz b/testsuite/test3_eoa4.tar.lz
new file mode 100644
index 0000000..1593feb
--- /dev/null
+++ b/testsuite/test3_eoa4.tar.lz
Binary files differ
diff --git a/testsuite/test3_eoa5.tar.lz b/testsuite/test3_eoa5.tar.lz
new file mode 100644
index 0000000..156bd3a
--- /dev/null
+++ b/testsuite/test3_eoa5.tar.lz
Binary files differ
diff --git a/testsuite/test3_gh1.tar b/testsuite/test3_gh1.tar
new file mode 100644
index 0000000..f969561
--- /dev/null
+++ b/testsuite/test3_gh1.tar
Binary files differ
diff --git a/testsuite/test3_gh1.tar.lz b/testsuite/test3_gh1.tar.lz
new file mode 100644
index 0000000..d38f46b
--- /dev/null
+++ b/testsuite/test3_gh1.tar.lz
Binary files differ
diff --git a/testsuite/test3_gh2.tar b/testsuite/test3_gh2.tar
new file mode 100644
index 0000000..f5f0c31
--- /dev/null
+++ b/testsuite/test3_gh2.tar
Binary files differ
diff --git a/testsuite/test3_gh2.tar.lz b/testsuite/test3_gh2.tar.lz
new file mode 100644
index 0000000..48f18dd
--- /dev/null
+++ b/testsuite/test3_gh2.tar.lz
Binary files differ
diff --git a/testsuite/test3_gh3.tar b/testsuite/test3_gh3.tar
new file mode 100644
index 0000000..e0d3a9d
--- /dev/null
+++ b/testsuite/test3_gh3.tar
Binary files differ
diff --git a/testsuite/test3_gh3.tar.lz b/testsuite/test3_gh3.tar.lz
new file mode 100644
index 0000000..89a31a6
--- /dev/null
+++ b/testsuite/test3_gh3.tar.lz
Binary files differ
diff --git a/testsuite/test3_gh4.tar b/testsuite/test3_gh4.tar
new file mode 100644
index 0000000..0655c31
--- /dev/null
+++ b/testsuite/test3_gh4.tar
Binary files differ
diff --git a/testsuite/test3_gh4.tar.lz b/testsuite/test3_gh4.tar.lz
new file mode 100644
index 0000000..5b9f605
--- /dev/null
+++ b/testsuite/test3_gh4.tar.lz
Binary files differ
diff --git a/testsuite/test3_gh5.tar.lz b/testsuite/test3_gh5.tar.lz
new file mode 100644
index 0000000..b8f4abe
--- /dev/null
+++ b/testsuite/test3_gh5.tar.lz
Binary files differ
diff --git a/testsuite/test3_gh6.tar.lz b/testsuite/test3_gh6.tar.lz
new file mode 100644
index 0000000..7be9aca
--- /dev/null
+++ b/testsuite/test3_gh6.tar.lz
Binary files differ
diff --git a/testsuite/test3_nn.tar b/testsuite/test3_nn.tar
new file mode 100644
index 0000000..c738dee
--- /dev/null
+++ b/testsuite/test3_nn.tar
Binary files differ
diff --git a/testsuite/test3_nn.tar.lz b/testsuite/test3_nn.tar.lz
new file mode 100644
index 0000000..8f78c1b
--- /dev/null
+++ b/testsuite/test3_nn.tar.lz
Binary files differ
diff --git a/testsuite/test3_sm1.tar.lz b/testsuite/test3_sm1.tar.lz
new file mode 100644
index 0000000..6eb3947
--- /dev/null
+++ b/testsuite/test3_sm1.tar.lz
Binary files differ
diff --git a/testsuite/test3_sm2.tar.lz b/testsuite/test3_sm2.tar.lz
new file mode 100644
index 0000000..f312fcb
--- /dev/null
+++ b/testsuite/test3_sm2.tar.lz
Binary files differ
diff --git a/testsuite/test3_sm3.tar.lz b/testsuite/test3_sm3.tar.lz
new file mode 100644
index 0000000..82ceb18
--- /dev/null
+++ b/testsuite/test3_sm3.tar.lz
Binary files differ
diff --git a/testsuite/test3_sm4.tar.lz b/testsuite/test3_sm4.tar.lz
new file mode 100644
index 0000000..601a640
--- /dev/null
+++ b/testsuite/test3_sm4.tar.lz
Binary files differ
diff --git a/testsuite/test_bad1.txt b/testsuite/test_bad1.txt
new file mode 100644
index 0000000..f8463f4
--- /dev/null
+++ b/testsuite/test_bad1.txt
@@ -0,0 +1,307 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program \ No newline at end of file
diff --git a/testsuite/test_bad1.txt.tar b/testsuite/test_bad1.txt.tar
new file mode 100644
index 0000000..dc2140e
--- /dev/null
+++ b/testsuite/test_bad1.txt.tar
Binary files differ
diff --git a/testsuite/test_bad1.txt.tar.lz b/testsuite/test_bad1.txt.tar.lz
new file mode 100644
index 0000000..afb1e85
--- /dev/null
+++ b/testsuite/test_bad1.txt.tar.lz
Binary files differ
diff --git a/testsuite/test_bad2.txt b/testsuite/test_bad2.txt
new file mode 100644
index 0000000..452408f
--- /dev/null
+++ b/testsuite/test_bad2.txt
@@ -0,0 +1,320 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. Ifodifnperived from the Progr"work based on therogrdifneneraeuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT X FR TO NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT X FR TO NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARQIS NO WARRATHERE IS NO WARRANTY
+FOR THE
+
+ This ee \ No newline at end of file
diff --git a/testsuite/test_bad2.txt.tar.lz b/testsuite/test_bad2.txt.tar.lz
new file mode 100644
index 0000000..598e121
--- /dev/null
+++ b/testsuite/test_bad2.txt.tar.lz
Binary files differ
diff --git a/testsuite/tlz_in_tar1.tar b/testsuite/tlz_in_tar1.tar
new file mode 100644
index 0000000..f2dfd6c
--- /dev/null
+++ b/testsuite/tlz_in_tar1.tar
Binary files differ
diff --git a/testsuite/tlz_in_tar2.tar b/testsuite/tlz_in_tar2.tar
new file mode 100644
index 0000000..be860c6
--- /dev/null
+++ b/testsuite/tlz_in_tar2.tar
Binary files differ
diff --git a/testsuite/ts_in_link.tar.lz b/testsuite/ts_in_link.tar.lz
new file mode 100644
index 0000000..dff816c
--- /dev/null
+++ b/testsuite/ts_in_link.tar.lz
Binary files differ
diff --git a/testsuite/ug32767.tar.lz b/testsuite/ug32767.tar.lz
new file mode 100644
index 0000000..499dc6d
--- /dev/null
+++ b/testsuite/ug32767.tar.lz
Binary files differ
diff --git a/testsuite/ug32chars.tar.lz b/testsuite/ug32chars.tar.lz
new file mode 100644
index 0000000..6c4da26
--- /dev/null
+++ b/testsuite/ug32chars.tar.lz
Binary files differ